Merge branch 'master' into malformed-manifest-fix

This commit is contained in:
nixxo 2023-01-15 21:20:04 +01:00
commit f72088f1e0
No known key found for this signature in database
GPG key ID: E0DE62EF9A9BFAB2
97 changed files with 4548 additions and 914 deletions

View file

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting a broken site - label: I'm reporting a broken site
required: true required: true
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -62,7 +62,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -70,8 +70,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2022.11.11, Current version: 2022.11.11 Latest version: 2023.01.06, Current version: 2023.01.06
yt-dlp is up to date (2022.11.11) yt-dlp is up to date (2023.01.06)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View file

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting a new site support request - label: I'm reporting a new site support request
required: true required: true
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -74,7 +74,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -82,8 +82,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2022.11.11, Current version: 2022.11.11 Latest version: 2023.01.06, Current version: 2023.01.06
yt-dlp is up to date (2022.11.11) yt-dlp is up to date (2023.01.06)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View file

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm requesting a site-specific feature - label: I'm requesting a site-specific feature
required: true required: true
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -70,7 +70,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -78,8 +78,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2022.11.11, Current version: 2022.11.11 Latest version: 2023.01.06, Current version: 2023.01.06
yt-dlp is up to date (2022.11.11) yt-dlp is up to date (2023.01.06)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View file

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting a bug unrelated to a specific site - label: I'm reporting a bug unrelated to a specific site
required: true required: true
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -55,7 +55,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -63,8 +63,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2022.11.11, Current version: 2022.11.11 Latest version: 2023.01.06, Current version: 2023.01.06
yt-dlp is up to date (2022.11.11) yt-dlp is up to date (2023.01.06)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View file

@ -20,7 +20,7 @@ body:
required: true required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true required: true
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
required: true required: true
@ -51,7 +51,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -59,7 +59,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2022.11.11, Current version: 2022.11.11 Latest version: 2023.01.06, Current version: 2023.01.06
yt-dlp is up to date (2022.11.11) yt-dlp is up to date (2023.01.06)
<more lines> <more lines>
render: shell render: shell

View file

@ -26,7 +26,7 @@ body:
required: true required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true required: true
- label: I've verified that I'm running yt-dlp version **2022.11.11** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
required: true required: true
@ -57,7 +57,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2022.11.11 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -65,7 +65,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2022.11.11, Current version: 2022.11.11 Latest version: 2023.01.06, Current version: 2023.01.06
yt-dlp is up to date (2022.11.11) yt-dlp is up to date (2023.01.06)
<more lines> <more lines>
render: shell render: shell

View file

@ -2,8 +2,6 @@
### Description of your *pull request* and other information ### Description of your *pull request* and other information
</details>
<!-- <!--
Explanation of your *pull request* in arbitrary form goes here. Please **make sure the description explains the purpose and effect** of your *pull request* and is worded well enough to be understood. Provide as much **context and examples** as possible Explanation of your *pull request* in arbitrary form goes here. Please **make sure the description explains the purpose and effect** of your *pull request* and is worded well enough to be understood. Provide as much **context and examples** as possible
@ -41,3 +39,5 @@ ### What is the purpose of your *pull request*?
- [ ] New extractor ([Piracy websites will not be accepted](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy)) - [ ] New extractor ([Piracy websites will not be accepted](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy))
- [ ] Core bug fix/improvement - [ ] Core bug fix/improvement
- [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes)) - [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes))
</details>

9
.gitignore vendored
View file

@ -30,6 +30,7 @@ cookies
*.f4v *.f4v
*.flac *.flac
*.flv *.flv
*.gif
*.jpeg *.jpeg
*.jpg *.jpg
*.m4a *.m4a
@ -120,9 +121,5 @@ yt-dlp.zip
*/extractor/lazy_extractors.py */extractor/lazy_extractors.py
# Plugins # Plugins
ytdlp_plugins/extractor/* ytdlp_plugins/
!ytdlp_plugins/extractor/__init__.py yt-dlp-plugins
!ytdlp_plugins/extractor/sample.py
ytdlp_plugins/postprocessor/*
!ytdlp_plugins/postprocessor/__init__.py
!ytdlp_plugins/postprocessor/sample.py

View file

@ -3,6 +3,7 @@ shirt-dev (collaborator)
coletdjnz/colethedj (collaborator) coletdjnz/colethedj (collaborator)
Ashish0804 (collaborator) Ashish0804 (collaborator)
nao20010128nao/Lesmiscore (collaborator) nao20010128nao/Lesmiscore (collaborator)
bashonly (collaborator)
h-h-h-h h-h-h-h
pauldubois98 pauldubois98
nixxo nixxo
@ -295,7 +296,6 @@ Mehavoid
winterbird-code winterbird-code
yashkc2025 yashkc2025
aldoridhoni aldoridhoni
bashonly
jacobtruman jacobtruman
masta79 masta79
palewire palewire
@ -357,3 +357,27 @@ SG5
the-marenga the-marenga
tkgmomosheep tkgmomosheep
vitkhab vitkhab
glensc
synthpop123
tntmod54321
milkknife
Bnyro
CapacitorSet
stelcodes
skbeh
muddi900
digitall
chengzhicn
mexus
JChris246
redraskal
Spicadox
barsnick
docbender
KurtBestor
Chrissi2812
FrederikNS
gschizas
JC-Chung
mzhou
OndrejBakan

View file

@ -11,6 +11,157 @@ # Instuctions for creating release
--> -->
### 2023.01.06
* Fix config locations by [Grub4k](https://github.com/Grub4k), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
* [downloader/aria2c] Disable native progress
* [utils] `mimetype2ext`: `weba` is not standard
* [utils] `windows_enable_vt_mode`: Better error handling
* [build] Add minimal `pyproject.toml`
* [update] Fix updater file removal on windows by [Grub4K](https://github.com/Grub4K)
* [cleanup] Misc fixes and cleanup
* [extractor/aitube] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/drtv] Add series extractors by [FrederikNS](https://github.com/FrederikNS)
* [extractor/volejtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/xanimu] Add extractor by [JChris246](https://github.com/JChris246)
* [extractor/youtube] Retry manifest refresh for live-from-start by [mzhou](https://github.com/mzhou)
* [extractor/biliintl] Add `/media` to `VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/biliIntl] Add fallback to `video_data` by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/crunchyroll:show] Add `language` to entries by [Chrissi2812](https://github.com/Chrissi2812)
* [extractor/joj] Fix extractor by [OndrejBakan](https://github.com/OndrejBakan), [pukkandan](https://github.com/pukkandan)
* [extractor/nbc] Update graphql query by [jacobtruman](https://github.com/jacobtruman)
* [extractor/reddit] Add subreddit as `channel_id` by [gschizas](https://github.com/gschizas)
* [extractor/tiktok] Add `TikTokLive` extractor by [JC-Chung](https://github.com/JC-Chung)
### 2023.01.02
* **Improve plugin architecture** by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan)
* Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.) and can be distributed and installed as packages. See [the readme](https://github.com/yt-dlp/yt-dlp/tree/05997b6e98e638d97d409c65bb5eb86da68f3b64#plugins) for more information
* Add `--compat-options 2021,2022`
* This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put Use `--compat 2022` in your config to guard against future compat changes.
* [downloader/aria2c] Native progress for aria2c via RPC by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan)
* Merge youtube-dl: Upto [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f6) by [Grub4k](https://github.com/Grub4k), [pukkandan](https://github.com/pukkandan)
* Add pre-processor stage `video`
* Let `--parse/replace-in-metadata` run at any post-processing stage
* Add `--enable-file-urls` by [coletdjnz](https://github.com/coletdjnz)
* Add new field `aspect_ratio`
* Add `ac4` to known codecs
* Add `weba` to known extensions
* [FFmpegVideoConvertor] Add `gif` to `--recode-video`
* Add message when there are no subtitles/thumbnails
* Deprioritize HEVC-over-FLV formats by [Lesmiscore](https://github.com/Lesmiscore)
* Make early reject of `--match-filter` stricter
* Fix `--cookies-from-browser` CLI parsing
* Fix `original_url` in playlists
* Fix bug in writing playlist info-json
* Fix bugs in `PlaylistEntries`
* [downloader/ffmpeg] Fix headers for video+audio formats by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
* [extractor] Add a way to distinguish IEs that returns only videos
* [extractor] Implement universal format sorting and deprecate `_sort_formats`
* [extractor] Let `_extract_format` functions obey `--ignore-no-formats`
* [extractor/generic] Add `fragment_query` extractor arg for DASH and HLS by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
* [extractor/generic] Decode unicode-escaped embed URLs by [bashonly](https://github.com/bashonly)
* [extractor/generic] Don't report redirect to https
* [extractor/generic] Fix JSON LD manifest extraction by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
* [extractor/generic] Use `Accept-Encoding: identity` for initial request by [coletdjnz](https://github.com/coletdjnz)
* [FormatSort] Add `mov` to `vext`
* [jsinterp] Escape regex that looks like nested set
* [webvtt] Handle premature EOF by [flashdagger](https://github.com/flashdagger)
* [utils] `classproperty`: Add cache support
* [utils] `get_exe_version`: Detect broken executables by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
* [utils] `js_to_json`: Fix bug in [f55523c](https://github.com/yt-dlp/yt-dlp/commit/f55523c) by [ChillingPepper](https://github.com/ChillingPepper), [pukkandan](https://github.com/pukkandan)
* [utils] Make `ExtractorError` mutable
* [utils] Move `FileDownloader.parse_bytes` into utils
* [utils] Move format sorting code into `utils`
* [utils] `windows_enable_vt_mode`: Proper implementation by [Grub4K](https://github.com/Grub4K)
* [update] Workaround [#5632](https://github.com/yt-dlp/yt-dlp/issues/5632)
* [docs] Improvements
* [cleanup] Misc fixes and cleanup
* [cleanup] Use `random.choices` by [freezboltz](https://github.com/freezboltz)
* [extractor/airtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/amazonminitv] Add extractors by [GautamMKGarg](https://github.com/GautamMKGarg), [nyuszika7h](https://github.com/nyuszika7h)
* [extractor/beatbump] Add extractors by [Bobscorn](https://github.com/Bobscorn), [pukkandan](https://github.com/pukkandan)
* [extractor/europarl] Add EuroParlWebstream extractor by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/kanal2] Add extractor by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc), [pukkandan](https://github.com/pukkandan)
* [extractor/kankanews] Add extractor by [synthpop123](https://github.com/synthpop123)
* [extractor/kick] Add extractor by [bashonly](https://github.com/bashonly)
* [extractor/mediastream] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0)
* [extractor/noice] Add NoicePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/oneplace] Add OnePlacePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/rumble] Add RumbleIE extractor by [flashdagger](https://github.com/flashdagger)
* [extractor/screencastify] Add extractor by [bashonly](https://github.com/bashonly)
* [extractor/trtcocuk] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/Veoh] Add user extractor by [tntmod54321](https://github.com/tntmod54321)
* [extractor/videoken] Add extractors by [bashonly](https://github.com/bashonly)
* [extractor/webcamerapl] Add extractor by [milkknife](https://github.com/milkknife)
* [extractor/amazon] Add `AmazonReviews` extractor by [bashonly](https://github.com/bashonly)
* [extractor/netverse] Add `NetverseSearch` extractor by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/vimeo] Add `VimeoProIE` by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
* [extractor/xiami] Remove extractors by [synthpop123](https://github.com/synthpop123)
* [extractor/youtube] Add `piped.video` by [Bnyro](https://github.com/Bnyro)
* [extractor/youtube] Consider language in format de-duplication
* [extractor/youtube] Extract DRC formats
* [extractor/youtube] Fix `ytuser:`
* [extractor/youtube] Fix bug in handling of music URLs
* [extractor/youtube] Subtitles cannot be translated to `und`
* [extractor/youtube:tab] Extract metadata from channel items by [coletdjnz](https://github.com/coletdjnz)
* [extractor/ARD] Add vtt subtitles by [CapacitorSet](https://github.com/CapacitorSet)
* [extractor/ArteTV] Extract chapters by [bashonly](https://github.com/bashonly), [iw0nderhow](https://github.com/iw0nderhow)
* [extractor/bandcamp] Add `album_artist` by [stelcodes](https://github.com/stelcodes)
* [extractor/bilibili] Fix `--no-playlist` for anthology
* [extractor/bilibili] Improve `_VALID_URL` by [skbeh](https://github.com/skbeh)
* [extractor/biliintl:series] Make partial download of series faster
* [extractor/BiliLive] Fix extractor
* [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction
* [extractor/cda] Support premium and misc improvements by [selfisekai](https://github.com/selfisekai)
* [extractor/ciscowebex] Support password-protected videos by [damianoamatruda](https://github.com/damianoamatruda)
* [extractor/curiositystream] Fix auth by [mnn](https://github.com/mnn)
* [extractor/embedly] Handle vimeo embeds
* [extractor/fifa] Fix Preplay extraction by [dirkf](https://github.com/dirkf)
* [extractor/foxsports] Fix extractor by [bashonly](https://github.com/bashonly)
* [extractor/gronkh] Fix `_VALID_URL` by [muddi900](https://github.com/muddi900)
* [extractor/hotstar] Improve format metadata
* [extractor/iqiyi] Fix `Iq` JS regex by [bashonly](https://github.com/bashonly)
* [extractor/la7] Improve extractor by [nixxo](https://github.com/nixxo)
* [extractor/mediaset] Better embed detection and error messages by [nixxo](https://github.com/nixxo)
* [extractor/mixch] Support `--wait-for-video`
* [extractor/naver] Improve `_VALID_URL` for `NaverNowIE` by [bashonly](https://github.com/bashonly)
* [extractor/naver] Treat fan subtitles as separate language
* [extractor/netverse] Extract comments by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/nosnl] Add support for /video by [HobbyistDev](https://github.com/HobbyistDev)
* [extractor/odnoklassniki] Extract subtitles by [bashonly](https://github.com/bashonly)
* [extractor/pinterest] Fix extractor by [bashonly](https://github.com/bashonly)
* [extractor/plutotv] Fix videos with non-zero start by [digitall](https://github.com/digitall)
* [extractor/polskieradio] Adapt to next.js redesigns by [selfisekai](https://github.com/selfisekai)
* [extractor/reddit] Add vcodec to fallback format by [chengzhicn](https://github.com/chengzhicn)
* [extractor/reddit] Extract crossposted media by [bashonly](https://github.com/bashonly)
* [extractor/reddit] Extract video embeds in text posts by [bashonly](https://github.com/bashonly)
* [extractor/rutube] Support private videos by [mexus](https://github.com/mexus)
* [extractor/sibnet] Separate from VKIE
* [extractor/slideslive] Fix extractor by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
* [extractor/slideslive] Support embeds and slides by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
* [extractor/soundcloud] Support user permalink by [nosoop](https://github.com/nosoop)
* [extractor/spankbang] Fix extractor by [JChris246](https://github.com/JChris246)
* [extractor/stv] Detect DRM
* [extractor/swearnet] Fix description bug
* [extractor/tencent] Fix geo-restricted video by [elyse0](https://github.com/elyse0)
* [extractor/tiktok] Fix subs, `DouyinIE`, improve `_VALID_URL` by [bashonly](https://github.com/bashonly)
* [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg by [bashonly](https://github.com/bashonly)
* [extractor/tiktok] Update API hostname by [redraskal](https://github.com/redraskal)
* [extractor/twitcasting] Fix videos with password by [Spicadox](https://github.com/Spicadox), [bashonly](https://github.com/bashonly)
* [extractor/twitter] Heed `--no-playlist` for multi-video tweets by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
* [extractor/twitter] Refresh guest token when expired by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly)
* [extractor/twitter:spaces] Add `Referer` to m3u8 by [nixxo](https://github.com/nixxo)
* [extractor/udemy] Fix lectures that have no URL and detect DRM
* [extractor/unsupported] Add more URLs
* [extractor/urplay] Support for audio-only formats by [barsnick](https://github.com/barsnick)
* [extractor/wistia] Improve extension detection by [Grub4k](https://github.com/Grub4k), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
* [extractor/yle_areena] Support restricted videos by [docbender](https://github.com/docbender)
* [extractor/youku] Fix extractor by [KurtBestor](https://github.com/KurtBestor)
* [extractor/youporn] Fix metadata by [marieell](https://github.com/marieell)
* [extractor/redgifs] Fix bug in [8c188d5](https://github.com/yt-dlp/yt-dlp/commit/8c188d5d09177ed213a05c900d3523867c5897fd)
### 2022.11.11 ### 2022.11.11
* Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128) * Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)

View file

@ -42,7 +42,7 @@ ## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub>
* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc * Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao) ## [Lesmiscore](https://github.com/Lesmiscore) <sub><sup>(nao20010128nao)</sup></sub>
**Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s **Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s
**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr **Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
@ -50,3 +50,10 @@ ## [Lesmiscore](https://github.com/Lesmiscore) (nao20010128nao)
* Download live from start to end for YouTube * Download live from start to end for YouTube
* Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc * Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc
* Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc * Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc
## [bashonly](https://github.com/bashonly)
* `--cookies-from-browser` support for Firefox containers
* Added support for new websites Genius, Kick, NBCStations, Triller, VideoKen etc
* Improved/fixed support for Anvato, Brightcove, Instagram, ParamountPlus, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc

View file

@ -17,8 +17,8 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
clean-test: clean-test:
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \
*.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp *.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
clean-dist: clean-dist:
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap

171
README.md
View file

@ -10,7 +10,7 @@
[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord")
[![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") [![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites")
[![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License")
[![CI Status](https://img.shields.io/github/workflow/status/yt-dlp/yt-dlp/Core%20Tests/master?label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") [![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
[![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") [![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
@ -61,6 +61,8 @@
* [Modifying metadata examples](#modifying-metadata-examples) * [Modifying metadata examples](#modifying-metadata-examples)
* [EXTRACTOR ARGUMENTS](#extractor-arguments) * [EXTRACTOR ARGUMENTS](#extractor-arguments)
* [PLUGINS](#plugins) * [PLUGINS](#plugins)
* [Installing Plugins](#installing-plugins)
* [Developing Plugins](#developing-plugins)
* [EMBEDDING YT-DLP](#embedding-yt-dlp) * [EMBEDDING YT-DLP](#embedding-yt-dlp)
* [Embedding examples](#embedding-examples) * [Embedding examples](#embedding-examples)
* [DEPRECATED OPTIONS](#deprecated-options) * [DEPRECATED OPTIONS](#deprecated-options)
@ -74,13 +76,13 @@
# NEW FEATURES # NEW FEATURES
* Merged with **youtube-dl v2021.12.17+ [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)** <!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) * Merged with **youtube-dl v2021.12.17+ [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f)** <!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
* **YouTube improvements**: * **YouTube improvements**:
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) * Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
@ -151,12 +153,15 @@ ### Differences in default behavior
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
For ease of use, a few more compat options are available: For ease of use, a few more compat options are available:
* `--compat-options all`: Use all compat options (Do NOT use) * `--compat-options all`: Use all compat options (Do NOT use)
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options
# INSTALLATION # INSTALLATION
@ -179,7 +184,7 @@ ## UPDATE
If you [installed with PIP](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program If you [installed with PIP](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) or refer their documentation For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation
<!-- MANPAGE: BEGIN EXCLUDED SECTION --> <!-- MANPAGE: BEGIN EXCLUDED SECTION -->
@ -217,7 +222,7 @@ #### Misc
<!-- MANPAGE: END EXCLUDED SECTION --> <!-- MANPAGE: END EXCLUDED SECTION -->
Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) **Note**: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
## DEPENDENCIES ## DEPENDENCIES
Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
@ -233,8 +238,9 @@ ### Strongly recommended
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
<!-- TODO: ffmpeg has merged this patch. Remove this note once there is new release --> There are bugs in ffmpeg that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
**Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
**Important**: What you need is ffmpeg *binary*, **NOT** [the python package of the same name](https://pypi.org/project/ffmpeg)
### Networking ### Networking
* [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE)
@ -281,7 +287,7 @@ ### Standalone PyInstaller Builds
`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). `pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
Note that pyinstaller with versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. **Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly. **Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly.
@ -414,6 +420,8 @@ ## Network Options:
--source-address IP Client-side IP address to bind to --source-address IP Client-side IP address to bind to
-4, --force-ipv4 Make all connections via IPv4 -4, --force-ipv4 Make all connections via IPv4
-6, --force-ipv6 Make all connections via IPv6 -6, --force-ipv6 Make all connections via IPv6
--enable-file-urls Enable file:// URLs. This is disabled by
default for security reasons.
## Geo-restriction: ## Geo-restriction:
--geo-verification-proxy URL Use this proxy to verify the IP address for --geo-verification-proxy URL Use this proxy to verify the IP address for
@ -448,7 +456,9 @@ ## Video Selection:
--date DATE Download only videos uploaded on this date. --date DATE Download only videos uploaded on this date.
The date can be "YYYYMMDD" or in the format The date can be "YYYYMMDD" or in the format
[now|today|yesterday][-N[day|week|month|year]]. [now|today|yesterday][-N[day|week|month|year]].
E.g. --date today-2weeks E.g. "--date today-2weeks" downloads
only videos uploaded on the same day two
weeks ago
--datebefore DATE Download only videos uploaded on or before --datebefore DATE Download only videos uploaded on or before
this date. The date formats accepted is the this date. The date formats accepted is the
same as --date same as --date
@ -525,8 +535,8 @@ ## Download Options:
linear=1::2 --retry-sleep fragment:exp=1:20 linear=1::2 --retry-sleep fragment:exp=1:20
--skip-unavailable-fragments Skip unavailable fragments for DASH, --skip-unavailable-fragments Skip unavailable fragments for DASH,
hlsnative and ISM downloads (default) hlsnative and ISM downloads (default)
(Alias: --no-abort-on-unavailable-fragment) (Alias: --no-abort-on-unavailable-fragments)
--abort-on-unavailable-fragment --abort-on-unavailable-fragments
Abort download if a fragment is unavailable Abort download if a fragment is unavailable
(Alias: --no-skip-unavailable-fragments) (Alias: --no-skip-unavailable-fragments)
--keep-fragments Keep downloaded fragments on disk after --keep-fragments Keep downloaded fragments on disk after
@ -725,7 +735,7 @@ ## Verbosity and Simulation Options:
screen, optionally prefixed with when to screen, optionally prefixed with when to
print it, separated by a ":". Supported print it, separated by a ":". Supported
values of "WHEN" are the same as that of values of "WHEN" are the same as that of
--use-postprocessor, and "video" (default). --use-postprocessor (default: video).
Implies --quiet. Implies --simulate unless Implies --quiet. Implies --simulate unless
--no-simulate or later stages of WHEN are --no-simulate or later stages of WHEN are
used. This option can be used multiple times used. This option can be used multiple times
@ -952,13 +962,18 @@ ## Post-Processing Options:
mkv/mka video files mkv/mka video files
--no-embed-info-json Do not embed the infojson as an attachment --no-embed-info-json Do not embed the infojson as an attachment
to the video file to the video file
--parse-metadata FROM:TO Parse additional metadata like title/artist --parse-metadata [WHEN:]FROM:TO
Parse additional metadata like title/artist
from other fields; see "MODIFYING METADATA" from other fields; see "MODIFYING METADATA"
for details for details. Supported values of "WHEN" are
--replace-in-metadata FIELDS REGEX REPLACE the same as that of --use-postprocessor
(default: pre_process)
--replace-in-metadata [WHEN:]FIELDS REGEX REPLACE
Replace text in a metadata field using the Replace text in a metadata field using the
given regex. This option can be used given regex. This option can be used
multiple times multiple times. Supported values of "WHEN"
are the same as that of --use-postprocessor
(default: pre_process)
--xattrs Write metadata to the video file's xattrs --xattrs Write metadata to the video file's xattrs
(using dublin core and xdg standards) (using dublin core and xdg standards)
--concat-playlist POLICY Concatenate videos in a playlist. One of --concat-playlist POLICY Concatenate videos in a playlist. One of
@ -979,18 +994,18 @@ ## Post-Processing Options:
--ffmpeg-location PATH Location of the ffmpeg binary; either the --ffmpeg-location PATH Location of the ffmpeg binary; either the
path to the binary or its containing directory path to the binary or its containing directory
--exec [WHEN:]CMD Execute a command, optionally prefixed with --exec [WHEN:]CMD Execute a command, optionally prefixed with
when to execute it (after_move if when to execute it, separated by a ":".
unspecified), separated by a ":". Supported Supported values of "WHEN" are the same as
values of "WHEN" are the same as that of that of --use-postprocessor (default:
--use-postprocessor. Same syntax as the after_move). Same syntax as the output
output template can be used to pass any template can be used to pass any field as
field as arguments to the command. After arguments to the command. After download, an
download, an additional field "filepath" additional field "filepath" that contains
that contains the final path of the the final path of the downloaded file is
downloaded file is also available, and if no also available, and if no fields are passed,
fields are passed, %(filepath)q is appended %(filepath,_filename|)q is appended to the
to the end of the command. This option can end of the command. This option can be used
be used multiple times multiple times
--no-exec Remove any previously defined --exec --no-exec Remove any previously defined --exec
--convert-subs FORMAT Convert the subtitles to another format --convert-subs FORMAT Convert the subtitles to another format
(currently supported: ass, lrc, srt, vtt) (currently supported: ass, lrc, srt, vtt)
@ -1028,14 +1043,16 @@ ## Post-Processing Options:
postprocessor is invoked. It can be one of postprocessor is invoked. It can be one of
"pre_process" (after video extraction), "pre_process" (after video extraction),
"after_filter" (after video passes filter), "after_filter" (after video passes filter),
"before_dl" (before each video download), "video" (after --format; before
"post_process" (after each video download; --print/--output), "before_dl" (before each
default), "after_move" (after moving video video download), "post_process" (after each
file to it's final locations), "after_video" video download; default), "after_move"
(after downloading and processing all (after moving video file to it's final
formats of a video), or "playlist" (at end locations), "after_video" (after downloading
of playlist). This option can be used and processing all formats of a video), or
multiple times to add different postprocessors "playlist" (at end of playlist). This option
can be used multiple times to add different
postprocessors
## SponsorBlock Options: ## SponsorBlock Options:
Make chapter entries for, or remove various segments (sponsor, Make chapter entries for, or remove various segments (sponsor,
@ -1102,16 +1119,22 @@ # CONFIGURATION
* `yt-dlp.conf` in the home path given by `-P` * `yt-dlp.conf` in the home path given by `-P`
* If `-P` is not given, the current directory is searched * If `-P` is not given, the current directory is searched
1. **User Configuration**: 1. **User Configuration**:
* `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS)
* `${XDG_CONFIG_HOME}/yt-dlp.conf` * `${XDG_CONFIG_HOME}/yt-dlp.conf`
* `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS)
* `${XDG_CONFIG_HOME}/yt-dlp/config.txt`
* `${APPDATA}/yt-dlp.conf`
* `${APPDATA}/yt-dlp/config` (recommended on Windows) * `${APPDATA}/yt-dlp/config` (recommended on Windows)
* `${APPDATA}/yt-dlp/config.txt` * `${APPDATA}/yt-dlp/config.txt`
* `~/yt-dlp.conf` * `~/yt-dlp.conf`
* `~/yt-dlp.conf.txt` * `~/yt-dlp.conf.txt`
* `~/.yt-dlp/config`
* `~/.yt-dlp/config.txt`
See also: [Notes about environment variables](#notes-about-environment-variables) See also: [Notes about environment variables](#notes-about-environment-variables)
1. **System Configuration**: 1. **System Configuration**:
* `/etc/yt-dlp.conf` * `/etc/yt-dlp.conf`
* `/etc/yt-dlp/config`
* `/etc/yt-dlp/config.txt`
E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
``` ```
@ -1130,7 +1153,7 @@ # Save all videos under YouTube directory in your home directory
-o ~/YouTube/%(title)s.%(ext)s -o ~/YouTube/%(title)s.%(ext)s
``` ```
Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell. **Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell.
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
@ -1206,7 +1229,7 @@ # OUTPUT TEMPLATE
<a id="outtmpl-postprocess-note"></a> <a id="outtmpl-postprocess-note"></a>
Note: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. **Note**: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete.
The available fields are: The available fields are:
@ -1327,7 +1350,7 @@ # OUTPUT TEMPLATE
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). **Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
**Tip**: Look at the `-j` output to identify which fields are available for the particular URL **Tip**: Look at the `-j` output to identify which fields are available for the particular URL
@ -1468,7 +1491,7 @@ ## Filtering Formats
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`. Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. **Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
@ -1721,7 +1744,7 @@ # EXTRACTOR ARGUMENTS
The following extractors use this feature: The following extractors use this feature:
#### youtube #### youtube
* `lang`: Language code to prefer translated metadata of this language (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
@ -1775,26 +1798,78 @@ #### rokfinchannel
#### twitter #### twitter
* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided * `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided
NOTE: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
# PLUGINS # PLUGINS
Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`; where `<root-dir>` is the directory of the binary (`<root-dir>/yt-dlp`), or the root directory of the module if you are running directly from source-code (`<root dir>/yt_dlp/__main__.py`). Plugins are currently not supported for the `pip` version Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!**
Plugins can be of `<type>`s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. Plugins can be of `<type>`s `extractor` or `postprocessor`.
- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it.
- Extractor plugins take priority over builtin extractors.
- Postprocessor plugins can be invoked using `--use-postprocessor NAME`.
See [ytdlp_plugins](ytdlp_plugins) for example plugins.
Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`.
If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability In other words, the file structure on the disk looks something like:
yt_dlp_plugins/
extractor/
myplugin.py
postprocessor/
myplugin.py
yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them.
See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)
## Installing Plugins
Plugins can be installed using various methods and locations.
1. **Configuration directories**:
Plugin packages (containing a `yt_dlp_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration):
* **User Plugins**
* `${XDG_CONFIG_HOME}/yt-dlp/plugins/<package name>/yt_dlp_plugins/` (recommended on Linux/macOS)
* `${XDG_CONFIG_HOME}/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
* `${APPDATA}/yt-dlp/plugins/<package name>/yt_dlp_plugins/` (recommended on Windows)
* `${APPDATA}/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
* `~/.yt-dlp/plugins/<package name>/yt_dlp_plugins/`
* `~/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
* **System Plugins**
* `/etc/yt-dlp/plugins/<package name>/yt_dlp_plugins/`
* `/etc/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location:
* Binary: where `<root-dir>/yt-dlp.exe`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
* Source: where `<root-dir>/yt_dlp/__main__.py`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/`
3. **pip and other locations in `PYTHONPATH`**
* Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example.
* Note: plugin files between plugin packages installed with pip must have unique filenames.
* Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder.
* Note: This does not apply for Pyinstaller/py2exe builds.
`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages.
* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py`
Run yt-dlp with `--verbose` to check if the plugin has been loaded.
## Developing Plugins
See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide.
All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`).
To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above.
If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability.
See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor.
# EMBEDDING YT-DLP # EMBEDDING YT-DLP

View file

@ -40,8 +40,12 @@ def main():
_ALL_CLASSES = get_all_ies() # Must be before import _ALL_CLASSES = get_all_ies() # Must be before import
import yt_dlp.plugins
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
# Filter out plugins
_ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')]
DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
module_src = '\n'.join(( module_src = '\n'.join((
MODULE_TEMPLATE, MODULE_TEMPLATE,

5
pyproject.toml Normal file
View file

@ -0,0 +1,5 @@
[build-system]
build-backend = 'setuptools.build_meta'
# https://github.com/yt-dlp/yt-dlp/issues/5941
# https://github.com/pypa/distutils/issues/17
requires = ['setuptools > 50']

View file

@ -26,7 +26,7 @@ markers =
[tox:tox] [tox:tox]
skipsdist = true skipsdist = true
envlist = py{36,37,38,39,310},pypy{36,37,38,39} envlist = py{36,37,38,39,310,311},pypy{36,37,38,39}
skip_missing_interpreters = true skip_missing_interpreters = true
[testenv] # tox [testenv] # tox

View file

@ -1,8 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os.path # Allow execution from anywhere
import subprocess import os
import sys import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import subprocess
import warnings import warnings
try: try:

View file

@ -51,6 +51,8 @@ # Supported sites
- **afreecatv:live**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com - **afreecatv:live**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com
- **afreecatv:user** - **afreecatv:user**
- **AirMozilla** - **AirMozilla**
- **AirTV**
- **AitubeKZVideo**
- **AliExpressLive** - **AliExpressLive**
- **AlJazeera** - **AlJazeera**
- **Allocine** - **Allocine**
@ -60,6 +62,10 @@ # Supported sites
- **Alura**: [<abbr title="netrc machine"><em>alura</em></abbr>] - **Alura**: [<abbr title="netrc machine"><em>alura</em></abbr>]
- **AluraCourse**: [<abbr title="netrc machine"><em>aluracourse</em></abbr>] - **AluraCourse**: [<abbr title="netrc machine"><em>aluracourse</em></abbr>]
- **Amara** - **Amara**
- **AmazonMiniTV**
- **amazonminitv:season**: Amazon MiniTV Series, "minitv:season:" prefix
- **amazonminitv:series**
- **AmazonReviews**
- **AmazonStore** - **AmazonStore**
- **AMCNetworks** - **AMCNetworks**
- **AmericasTestKitchen** - **AmericasTestKitchen**
@ -130,6 +136,8 @@ # Supported sites
- **BBVTV**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] - **BBVTV**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>]
- **BBVTVLive**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] - **BBVTVLive**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>]
- **BBVTVRecordings**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] - **BBVTVRecordings**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>]
- **BeatBumpPlaylist**
- **BeatBumpVideo**
- **Beatport** - **Beatport**
- **Beeg** - **Beeg**
- **BehindKink** - **BehindKink**
@ -157,7 +165,7 @@ # Supported sites
- **BilibiliSpacePlaylist** - **BilibiliSpacePlaylist**
- **BilibiliSpaceVideo** - **BilibiliSpaceVideo**
- **BiliIntl**: [<abbr title="netrc machine"><em>biliintl</em></abbr>] - **BiliIntl**: [<abbr title="netrc machine"><em>biliintl</em></abbr>]
- **BiliIntlSeries**: [<abbr title="netrc machine"><em>biliintl</em></abbr>] - **biliIntl:series**: [<abbr title="netrc machine"><em>biliintl</em></abbr>]
- **BiliLive** - **BiliLive**
- **BioBioChileTV** - **BioBioChileTV**
- **Biography** - **Biography**
@ -345,6 +353,8 @@ # Supported sites
- **DrTuber** - **DrTuber**
- **drtv** - **drtv**
- **drtv:live** - **drtv:live**
- **drtv:season**
- **drtv:series**
- **DTube** - **DTube**
- **duboku**: www.duboku.io - **duboku**: www.duboku.io
- **duboku:list**: www.duboku.io entire series - **duboku:list**: www.duboku.io entire series
@ -387,6 +397,7 @@ # Supported sites
- **ESPNCricInfo** - **ESPNCricInfo**
- **EsriVideo** - **EsriVideo**
- **Europa** - **Europa**
- **EuroParlWebstream**
- **EuropeanTour** - **EuropeanTour**
- **Eurosport** - **Eurosport**
- **EUScreen** - **EUScreen**
@ -599,6 +610,8 @@ # Supported sites
- **JWPlatform** - **JWPlatform**
- **Kakao** - **Kakao**
- **Kaltura** - **Kaltura**
- **Kanal2**
- **KankaNews**
- **Karaoketv** - **Karaoketv**
- **KarriereVideos** - **KarriereVideos**
- **Katsomo** - **Katsomo**
@ -607,8 +620,10 @@ # Supported sites
- **Ketnet** - **Ketnet**
- **khanacademy** - **khanacademy**
- **khanacademy:unit** - **khanacademy:unit**
- **Kick**
- **Kicker** - **Kicker**
- **KickStarter** - **KickStarter**
- **KickVOD**
- **KinjaEmbed** - **KinjaEmbed**
- **KinoPoisk** - **KinoPoisk**
- **KompasVideo** - **KompasVideo**
@ -709,6 +724,7 @@ # Supported sites
- **Mediasite** - **Mediasite**
- **MediasiteCatalog** - **MediasiteCatalog**
- **MediasiteNamedCatalog** - **MediasiteNamedCatalog**
- **MediaStream**
- **MediaWorksNZVOD** - **MediaWorksNZVOD**
- **Medici** - **Medici**
- **megaphone.fm**: megaphone.fm embedded players - **megaphone.fm**: megaphone.fm embedded players
@ -845,6 +861,7 @@ # Supported sites
- **NetPlusTVRecordings**: [<abbr title="netrc machine"><em>netplus</em></abbr>] - **NetPlusTVRecordings**: [<abbr title="netrc machine"><em>netplus</em></abbr>]
- **Netverse** - **Netverse**
- **NetversePlaylist** - **NetversePlaylist**
- **NetverseSearch**: "netsearch:" prefix
- **Netzkino** - **Netzkino**
- **Newgrounds** - **Newgrounds**
- **Newgrounds:playlist** - **Newgrounds:playlist**
@ -887,6 +904,7 @@ # Supported sites
- **njoy:embed** - **njoy:embed**
- **NJPWWorld**: [<abbr title="netrc machine"><em>njpwworld</em></abbr>] 新日本プロレスワールド - **NJPWWorld**: [<abbr title="netrc machine"><em>njpwworld</em></abbr>] 新日本プロレスワールド
- **NobelPrize** - **NobelPrize**
- **NoicePodcast**
- **NonkTube** - **NonkTube**
- **NoodleMagazine** - **NoodleMagazine**
- **Noovo** - **Noovo**
@ -933,6 +951,7 @@ # Supported sites
- **on24**: ON24 - **on24**: ON24
- **OnDemandKorea** - **OnDemandKorea**
- **OneFootball** - **OneFootball**
- **OnePlacePodcast**
- **onet.pl** - **onet.pl**
- **onet.tv** - **onet.tv**
- **onet.tv:channel** - **onet.tv:channel**
@ -1022,11 +1041,13 @@ # Supported sites
- **PokerGoCollection**: [<abbr title="netrc machine"><em>pokergo</em></abbr>] - **PokerGoCollection**: [<abbr title="netrc machine"><em>pokergo</em></abbr>]
- **PolsatGo** - **PolsatGo**
- **PolskieRadio** - **PolskieRadio**
- **polskieradio:audition**
- **polskieradio:category**
- **polskieradio:kierowcow** - **polskieradio:kierowcow**
- **polskieradio:legacy**
- **polskieradio:player** - **polskieradio:player**
- **polskieradio:podcast** - **polskieradio:podcast**
- **polskieradio:podcast:list** - **polskieradio:podcast:list**
- **PolskieRadioCategory**
- **Popcorntimes** - **Popcorntimes**
- **PopcornTV** - **PopcornTV**
- **PornCom** - **PornCom**
@ -1155,6 +1176,7 @@ # Supported sites
- **rtvslo.si** - **rtvslo.si**
- **RUHD** - **RUHD**
- **Rule34Video** - **Rule34Video**
- **Rumble**
- **RumbleChannel** - **RumbleChannel**
- **RumbleEmbed** - **RumbleEmbed**
- **Ruptly** - **Ruptly**
@ -1189,6 +1211,7 @@ # Supported sites
- **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix
- **Screen9** - **Screen9**
- **Screencast** - **Screencast**
- **Screencastify**
- **ScreencastOMatic** - **ScreencastOMatic**
- **ScrippsNetworks** - **ScrippsNetworks**
- **scrippsnetworks:watch** - **scrippsnetworks:watch**
@ -1212,6 +1235,7 @@ # Supported sites
- **ShugiinItvLive**: 衆議院インターネット審議中継 - **ShugiinItvLive**: 衆議院インターネット審議中継
- **ShugiinItvLiveRoom**: 衆議院インターネット審議中継 (中継) - **ShugiinItvLiveRoom**: 衆議院インターネット審議中継 (中継)
- **ShugiinItvVod**: 衆議院インターネット審議中継 (ビデオライブラリ) - **ShugiinItvVod**: 衆議院インターネット審議中継 (ビデオライブラリ)
- **SibnetEmbed**
- **simplecast** - **simplecast**
- **simplecast:episode** - **simplecast:episode**
- **simplecast:podcast** - **simplecast:podcast**
@ -1227,7 +1251,7 @@ # Supported sites
- **skynewsarabia:video** - **skynewsarabia:video**
- **SkyNewsAU** - **SkyNewsAU**
- **Slideshare** - **Slideshare**
- **SlidesLive**: (**Currently broken**) - **SlidesLive**
- **Slutload** - **Slutload**
- **Smotrim** - **Smotrim**
- **Snotr** - **Snotr**
@ -1241,6 +1265,7 @@ # Supported sites
- **soundcloud:set**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **soundcloud:set**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>]
- **soundcloud:trackstation**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **soundcloud:trackstation**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>]
- **soundcloud:user**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **soundcloud:user**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>]
- **soundcloud:user:permalink**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>]
- **SoundcloudEmbed** - **SoundcloudEmbed**
- **soundgasm** - **soundgasm**
- **soundgasm:profile** - **soundgasm:profile**
@ -1352,10 +1377,14 @@ # Supported sites
- **ThisAmericanLife** - **ThisAmericanLife**
- **ThisAV** - **ThisAV**
- **ThisOldHouse** - **ThisOldHouse**
- **ThisVid**
- **ThisVidMember**
- **ThisVidPlaylist**
- **ThreeSpeak** - **ThreeSpeak**
- **ThreeSpeakUser** - **ThreeSpeakUser**
- **TikTok** - **TikTok**
- **tiktok:effect**: (**Currently broken**) - **tiktok:effect**: (**Currently broken**)
- **tiktok:live**
- **tiktok:sound**: (**Currently broken**) - **tiktok:sound**: (**Currently broken**)
- **tiktok:tag**: (**Currently broken**) - **tiktok:tag**: (**Currently broken**)
- **tiktok:user**: (**Currently broken**) - **tiktok:user**: (**Currently broken**)
@ -1383,6 +1412,7 @@ # Supported sites
- **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
- **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix
- **TrovoVod** - **TrovoVod**
- **TrtCocukVideo**
- **TrueID** - **TrueID**
- **TruNews** - **TruNews**
- **Truth** - **Truth**
@ -1483,6 +1513,7 @@ # Supported sites
- **VeeHD** - **VeeHD**
- **Veo** - **Veo**
- **Veoh** - **Veoh**
- **veoh:user**
- **Vesti**: Вести.Ru - **Vesti**: Вести.Ru
- **Vevo** - **Vevo**
- **VevoPlaylist** - **VevoPlaylist**
@ -1502,6 +1533,11 @@ # Supported sites
- **video.sky.it:live** - **video.sky.it:live**
- **VideoDetective** - **VideoDetective**
- **videofy.me** - **videofy.me**
- **VideoKen**
- **VideoKenCategory**
- **VideoKenPlayer**
- **VideoKenPlaylist**
- **VideoKenTopic**
- **videomore** - **videomore**
- **videomore:season** - **videomore:season**
- **videomore:video** - **videomore:video**
@ -1521,6 +1557,7 @@ # Supported sites
- **vimeo:group**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:group**: [<abbr title="netrc machine"><em>vimeo</em></abbr>]
- **vimeo:likes**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo user likes - **vimeo:likes**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo user likes
- **vimeo:ondemand**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:ondemand**: [<abbr title="netrc machine"><em>vimeo</em></abbr>]
- **vimeo:pro**: [<abbr title="netrc machine"><em>vimeo</em></abbr>]
- **vimeo:review**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Review pages on vimeo - **vimeo:review**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Review pages on vimeo
- **vimeo:user**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **vimeo:user**: [<abbr title="netrc machine"><em>vimeo</em></abbr>]
- **vimeo:watchlater**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) - **vimeo:watchlater**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)
@ -1549,6 +1586,7 @@ # Supported sites
- **VoiceRepublic** - **VoiceRepublic**
- **voicy** - **voicy**
- **voicy:channel** - **voicy:channel**
- **VolejTV**
- **Voot** - **Voot**
- **VootSeries** - **VootSeries**
- **VoxMedia** - **VoxMedia**
@ -1591,6 +1629,7 @@ # Supported sites
- **WDRElefant** - **WDRElefant**
- **WDRPage** - **WDRPage**
- **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
- **Webcamerapl**
- **Webcaster** - **Webcaster**
- **WebcasterFeed** - **WebcasterFeed**
- **WebOfStories** - **WebOfStories**
@ -1604,6 +1643,7 @@ # Supported sites
- **wikimedia.org** - **wikimedia.org**
- **Willow** - **Willow**
- **WimTV** - **WimTV**
- **WinSportsVideo**
- **Wistia** - **Wistia**
- **WistiaChannel** - **WistiaChannel**
- **WistiaPlaylist** - **WistiaPlaylist**
@ -1618,6 +1658,7 @@ # Supported sites
- **WWE** - **WWE**
- **wyborcza:video** - **wyborcza:video**
- **WyborczaPodcast** - **WyborczaPodcast**
- **Xanimu**
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing

227
test/test_config.py Normal file
View file

@ -0,0 +1,227 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import unittest
import unittest.mock
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import contextlib
import itertools
from pathlib import Path
from yt_dlp.compat import compat_expanduser
from yt_dlp.options import create_parser, parseOpts
from yt_dlp.utils import Config, get_executable_path
ENVIRON_DEFAULTS = {
'HOME': None,
'XDG_CONFIG_HOME': '/_xdg_config_home/',
'USERPROFILE': 'C:/Users/testing/',
'APPDATA': 'C:/Users/testing/AppData/Roaming/',
'HOMEDRIVE': 'C:/',
'HOMEPATH': 'Users/testing/',
}
@contextlib.contextmanager
def set_environ(**kwargs):
saved_environ = os.environ.copy()
for name, value in {**ENVIRON_DEFAULTS, **kwargs}.items():
if value is None:
os.environ.pop(name, None)
else:
os.environ[name] = value
yield
os.environ.clear()
os.environ.update(saved_environ)
def _generate_expected_groups():
xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
appdata_dir = os.getenv('appdata')
home_dir = compat_expanduser('~')
return {
'Portable': [
Path(get_executable_path(), 'yt-dlp.conf'),
],
'Home': [
Path('yt-dlp.conf'),
],
'User': [
Path(xdg_config_home, 'yt-dlp.conf'),
Path(xdg_config_home, 'yt-dlp', 'config'),
Path(xdg_config_home, 'yt-dlp', 'config.txt'),
*((
Path(appdata_dir, 'yt-dlp.conf'),
Path(appdata_dir, 'yt-dlp', 'config'),
Path(appdata_dir, 'yt-dlp', 'config.txt'),
) if appdata_dir else ()),
Path(home_dir, 'yt-dlp.conf'),
Path(home_dir, 'yt-dlp.conf.txt'),
Path(home_dir, '.yt-dlp', 'config'),
Path(home_dir, '.yt-dlp', 'config.txt'),
],
'System': [
Path('/etc/yt-dlp.conf'),
Path('/etc/yt-dlp/config'),
Path('/etc/yt-dlp/config.txt'),
]
}
class TestConfig(unittest.TestCase):
maxDiff = None
@set_environ()
def test_config__ENVIRON_DEFAULTS_sanity(self):
expected = make_expected()
self.assertCountEqual(
set(expected), expected,
'ENVIRON_DEFAULTS produces non unique names')
def test_config_all_environ_values(self):
for name, value in ENVIRON_DEFAULTS.items():
for new_value in (None, '', '.', value or '/some/dir'):
with set_environ(**{name: new_value}):
self._simple_grouping_test()
def test_config_default_expected_locations(self):
files, _ = self._simple_config_test()
self.assertEqual(
files, make_expected(),
'Not all expected locations have been checked')
def test_config_default_grouping(self):
self._simple_grouping_test()
def _simple_grouping_test(self):
expected_groups = make_expected_groups()
for name, group in expected_groups.items():
for index, existing_path in enumerate(group):
result, opts = self._simple_config_test(existing_path)
expected = expected_from_expected_groups(expected_groups, existing_path)
self.assertEqual(
result, expected,
f'The checked locations do not match the expected ({name}, {index})')
self.assertEqual(
opts.outtmpl['default'], '1',
f'The used result value was incorrect ({name}, {index})')
def _simple_config_test(self, *stop_paths):
encountered = 0
paths = []
def read_file(filename, default=[]):
nonlocal encountered
path = Path(filename)
paths.append(path)
if path in stop_paths:
encountered += 1
return ['-o', f'{encountered}']
with ConfigMock(read_file):
_, opts, _ = parseOpts([], False)
return paths, opts
@set_environ()
def test_config_early_exit_commandline(self):
self._early_exit_test(0, '--ignore-config')
@set_environ()
def test_config_early_exit_files(self):
for index, _ in enumerate(make_expected(), 1):
self._early_exit_test(index)
def _early_exit_test(self, allowed_reads, *args):
reads = 0
def read_file(filename, default=[]):
nonlocal reads
reads += 1
if reads > allowed_reads:
self.fail('The remaining config was not ignored')
elif reads == allowed_reads:
return ['--ignore-config']
with ConfigMock(read_file):
parseOpts(args, False)
@set_environ()
def test_config_override_commandline(self):
self._override_test(0, '-o', 'pass')
@set_environ()
def test_config_override_files(self):
for index, _ in enumerate(make_expected(), 1):
self._override_test(index)
def _override_test(self, start_index, *args):
index = 0
def read_file(filename, default=[]):
nonlocal index
index += 1
if index > start_index:
return ['-o', 'fail']
elif index == start_index:
return ['-o', 'pass']
with ConfigMock(read_file):
_, opts, _ = parseOpts(args, False)
self.assertEqual(
opts.outtmpl['default'], 'pass',
'The earlier group did not override the later ones')
@contextlib.contextmanager
def ConfigMock(read_file=None):
with unittest.mock.patch('yt_dlp.options.Config') as mock:
mock.return_value = Config(create_parser())
if read_file is not None:
mock.read_file = read_file
yield mock
def make_expected(*filepaths):
return expected_from_expected_groups(_generate_expected_groups(), *filepaths)
def make_expected_groups(*filepaths):
return _filter_expected_groups(_generate_expected_groups(), filepaths)
def expected_from_expected_groups(expected_groups, *filepaths):
return list(itertools.chain.from_iterable(
_filter_expected_groups(expected_groups, filepaths).values()))
def _filter_expected_groups(expected, filepaths):
if not filepaths:
return expected
result = {}
for group, paths in expected.items():
new_paths = []
for path in paths:
new_paths.append(path)
if path in filepaths:
break
result[group] = new_paths
return result
if __name__ == '__main__':
unittest.main()

73
test/test_plugins.py Normal file
View file

@ -0,0 +1,73 @@
import importlib
import os
import shutil
import sys
import unittest
from pathlib import Path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata')
sys.path.append(str(TEST_DATA_DIR))
importlib.invalidate_caches()
from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins
class TestPlugins(unittest.TestCase):
TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME
def test_directories_containing_plugins(self):
self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories()))
def test_extractor_classes(self):
for module_name in tuple(sys.modules):
if module_name.startswith(f'{PACKAGE_NAME}.extractor'):
del sys.modules[module_name]
plugins_ie = load_plugins('extractor', 'IE')
self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
self.assertIn('NormalPluginIE', plugins_ie.keys())
# don't load modules with underscore prefix
self.assertFalse(
f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(),
'loaded module beginning with underscore')
self.assertNotIn('IgnorePluginIE', plugins_ie.keys())
# Don't load extractors with underscore prefix
self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys())
# Don't load extractors not specified in __all__ (if supplied)
self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys())
self.assertIn('InAllPluginIE', plugins_ie.keys())
def test_postprocessor_classes(self):
plugins_pp = load_plugins('postprocessor', 'PP')
self.assertIn('NormalPluginPP', plugins_pp.keys())
def test_importing_zipped_module(self):
zip_path = TEST_DATA_DIR / 'zipped_plugins.zip'
shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4])
sys.path.append(str(zip_path)) # add zip to search paths
importlib.invalidate_caches() # reset the import caches
try:
for plugin_type in ('extractor', 'postprocessor'):
package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}')
self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__))
plugins_ie = load_plugins('extractor', 'IE')
self.assertIn('ZippedPluginIE', plugins_ie.keys())
plugins_pp = load_plugins('postprocessor', 'PP')
self.assertIn('ZippedPluginPP', plugins_pp.keys())
finally:
sys.path.remove(str(zip_path))
os.remove(zip_path)
importlib.invalidate_caches() # reset the import caches
if __name__ == '__main__':
unittest.main()

View file

@ -954,6 +954,85 @@ def test_escape_url(self):
) )
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
def test_js_to_json_vars_strings(self):
self.assertDictEqual(
json.loads(js_to_json(
'''{
'null': a,
'nullStr': b,
'true': c,
'trueStr': d,
'false': e,
'falseStr': f,
'unresolvedVar': g,
}''',
{
'a': 'null',
'b': '"null"',
'c': 'true',
'd': '"true"',
'e': 'false',
'f': '"false"',
'g': 'var',
}
)),
{
'null': None,
'nullStr': 'null',
'true': True,
'trueStr': 'true',
'false': False,
'falseStr': 'false',
'unresolvedVar': 'var'
}
)
self.assertDictEqual(
json.loads(js_to_json(
'''{
'int': a,
'intStr': b,
'float': c,
'floatStr': d,
}''',
{
'a': '123',
'b': '"123"',
'c': '1.23',
'd': '"1.23"',
}
)),
{
'int': 123,
'intStr': '123',
'float': 1.23,
'floatStr': '1.23',
}
)
self.assertDictEqual(
json.loads(js_to_json(
'''{
'object': a,
'objectStr': b,
'array': c,
'arrayStr': d,
}''',
{
'a': '{}',
'b': '"{}"',
'c': '[]',
'd': '"[]"',
}
)),
{
'object': {},
'objectStr': '{}',
'array': [],
'arrayStr': '[]',
}
)
def test_js_to_json_realworld(self): def test_js_to_json_realworld(self):
inp = '''{ inp = '''{
'clip':{'provider':'pseudo'} 'clip':{'provider':'pseudo'}
@ -1874,6 +1953,8 @@ def test_get_compatible_ext(self):
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv')
self.assertEqual(get_compatible_ext( self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm')
self.assertEqual(get_compatible_ext(
vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm')
self.assertEqual(get_compatible_ext( self.assertEqual(get_compatible_ext(
vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4')

View file

@ -0,0 +1,5 @@
from yt_dlp.extractor.common import InfoExtractor
class IgnorePluginIE(InfoExtractor):
pass

View file

@ -0,0 +1,12 @@
from yt_dlp.extractor.common import InfoExtractor
class IgnoreNotInAllPluginIE(InfoExtractor):
pass
class InAllPluginIE(InfoExtractor):
pass
__all__ = ['InAllPluginIE']

View file

@ -0,0 +1,9 @@
from yt_dlp.extractor.common import InfoExtractor
class NormalPluginIE(InfoExtractor):
pass
class _IgnoreUnderscorePluginIE(InfoExtractor):
pass

View file

@ -0,0 +1,5 @@
from yt_dlp.postprocessor.common import PostProcessor
class NormalPluginPP(PostProcessor):
pass

View file

@ -0,0 +1,5 @@
from yt_dlp.extractor.common import InfoExtractor
class ZippedPluginIE(InfoExtractor):
pass

View file

@ -0,0 +1,5 @@
from yt_dlp.postprocessor.common import PostProcessor
class ZippedPluginPP(PostProcessor):
pass

View file

@ -32,7 +32,8 @@
from .extractor.common import UnsupportedURLIE from .extractor.common import UnsupportedURLIE
from .extractor.openload import PhantomJSwrapper from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text from .minicurses import format_text
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors from .plugins import directories as plugin_directories
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
from .postprocessor import ( from .postprocessor import (
EmbedThumbnailPP, EmbedThumbnailPP,
FFmpegFixupDuplicateMoovPP, FFmpegFixupDuplicateMoovPP,
@ -317,6 +318,7 @@ class YoutubeDL:
If not provided and the key is encrypted, yt-dlp will ask interactively If not provided and the key is encrypted, yt-dlp will ask interactively
prefer_insecure: Use HTTP instead of HTTPS to retrieve information. prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
(Only supported by some extractors) (Only supported by some extractors)
enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
http_headers: A dictionary of custom headers to be used for all requests http_headers: A dictionary of custom headers to be used for all requests
proxy: URL of the proxy server to use proxy: URL of the proxy server to use
geo_verification_proxy: URL of the proxy to use for IP address verification geo_verification_proxy: URL of the proxy to use for IP address verification
@ -584,7 +586,6 @@ def __init__(self, params=None, auto_init=True):
self._playlist_urls = set() self._playlist_urls = set()
self.cache = Cache(self) self.cache = Cache(self)
windows_enable_vt_mode()
stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
self._out_files = Namespace( self._out_files = Namespace(
out=stdout, out=stdout,
@ -593,6 +594,12 @@ def __init__(self, params=None, auto_init=True):
console=None if compat_os_name == 'nt' else next( console=None if compat_os_name == 'nt' else next(
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
) )
try:
windows_enable_vt_mode()
except Exception as e:
self.write_debug(f'Failed to enable VT mode: {e}')
self._allow_colors = Namespace(**{ self._allow_colors = Namespace(**{
type_: not self.params.get('no_color') and supports_terminal_sequences(stream) type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
for type_, stream in self._out_files.items_ if type_ != 'console' for type_, stream in self._out_files.items_ if type_ != 'console'
@ -1068,7 +1075,7 @@ def _outtmpl_expandpath(outtmpl):
# correspondingly that is not what we want since we need to keep # correspondingly that is not what we want since we need to keep
# '%%' intact for template dict substitution step. Working around # '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack. # with boundary-alike separator hack.
sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) sep = ''.join(random.choices(ascii_letters, k=32))
outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
# outtmpl should be expand_path'ed before template dict substitution # outtmpl should be expand_path'ed before template dict substitution
@ -1862,11 +1869,10 @@ def __process_playlist(self, ie_result, download):
self.to_screen('[download] Downloading item %s of %s' % ( self.to_screen('[download] Downloading item %s of %s' % (
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
extra.update({ entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
'playlist_index': playlist_index, 'playlist_index': playlist_index,
'playlist_autonumber': i + 1, 'playlist_autonumber': i + 1,
}) }, extra))
entry_result = self.__process_iterable_entry(entry, download, extra)
if not entry_result: if not entry_result:
failures += 1 failures += 1
if failures >= max_failures: if failures >= max_failures:
@ -2977,6 +2983,16 @@ def process_info(self, info_dict):
# Does nothing under normal operation - for backward compatibility of process_info # Does nothing under normal operation - for backward compatibility of process_info
self.post_extract(info_dict) self.post_extract(info_dict)
def replace_info_dict(new_info):
nonlocal info_dict
if new_info == info_dict:
return
info_dict.clear()
info_dict.update(new_info)
new_info, _ = self.pre_process(info_dict, 'video')
replace_info_dict(new_info)
self._num_downloads += 1 self._num_downloads += 1
# info_dict['_filename'] needs to be set for backward compatibility # info_dict['_filename'] needs to be set for backward compatibility
@ -3090,13 +3106,6 @@ def _write_link_file(link_type):
for link_type, should_write in write_links.items()): for link_type, should_write in write_links.items()):
return return
def replace_info_dict(new_info):
nonlocal info_dict
if new_info == info_dict:
return
info_dict.clear()
info_dict.update(new_info)
new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
replace_info_dict(new_info) replace_info_dict(new_info)
@ -3388,6 +3397,7 @@ def sanitize_info(info_dict, remove_private_keys=False):
reject = lambda k, v: v is None or k.startswith('__') or k in { reject = lambda k, v: v is None or k.startswith('__') or k in {
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber', 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
'_format_sort_fields',
} }
else: else:
reject = lambda k, v: False reject = lambda k, v: False
@ -3457,6 +3467,7 @@ def run_pp(self, pp, infodict):
return infodict return infodict
def run_all_pps(self, key, info, *, additional_pps=None): def run_all_pps(self, key, info, *, additional_pps=None):
if key != 'video':
self._forceprint(key, info) self._forceprint(key, info)
for pp in (additional_pps or []) + self._pps[key]: for pp in (additional_pps or []) + self._pps[key]:
info = self.run_pp(pp, info) info = self.run_pp(pp, info)
@ -3726,7 +3737,10 @@ def print_debug_header(self):
# These imports can be slow. So import them only as needed # These imports can be slow. So import them only as needed
from .extractor.extractors import _LAZY_LOADER from .extractor.extractors import _LAZY_LOADER
from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors from .extractor.extractors import (
_PLUGIN_CLASSES as plugin_ies,
_PLUGIN_OVERRIDES as plugin_ie_overrides
)
def get_encoding(stream): def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
@ -3771,10 +3785,6 @@ def get_encoding(stream):
write_debug('Lazy loading extractors is forcibly disabled') write_debug('Lazy loading extractors is forcibly disabled')
else: else:
write_debug('Lazy loading extractors is disabled') write_debug('Lazy loading extractors is disabled')
if plugin_extractors or plugin_postprocessors:
write_debug('Plugins: %s' % [
'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
if self.params['compat_opts']: if self.params['compat_opts']:
write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
@ -3808,6 +3818,21 @@ def get_encoding(stream):
proxy_map.update(handler.proxies) proxy_map.update(handler.proxies)
write_debug(f'Proxy map: {proxy_map}') write_debug(f'Proxy map: {proxy_map}')
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
display_list = ['%s%s' % (
klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in plugins.items()]
if plugin_type == 'Extractor':
display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
for parent, plugins in plugin_ie_overrides.items())
if not display_list:
continue
write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
plugin_dirs = plugin_directories()
if plugin_dirs:
write_debug(f'Plugin directories: {plugin_dirs}')
# Not implemented # Not implemented
if False and self.params.get('call_home'): if False and self.params.get('call_home'):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
@ -3857,8 +3882,11 @@ def _setup_opener(self):
# https://github.com/ytdl-org/youtube-dl/issues/8227) # https://github.com/ytdl-org/youtube-dl/issues/8227)
file_handler = urllib.request.FileHandler() file_handler = urllib.request.FileHandler()
if not self.params.get('enable_file_urls'):
def file_open(*args, **kwargs): def file_open(*args, **kwargs):
raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons') raise urllib.error.URLError(
'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
'Use --enable-file-urls to enable at your own risk.')
file_handler.file_open = file_open file_handler.file_open = file_open
opener = urllib.request.build_opener( opener = urllib.request.build_opener(
@ -3921,7 +3949,7 @@ def _write_description(self, label, ie_result, descfn):
elif not self.params.get('overwrites', True) and os.path.exists(descfn): elif not self.params.get('overwrites', True) and os.path.exists(descfn):
self.to_screen(f'[info] {label.title()} description is already present') self.to_screen(f'[info] {label.title()} description is already present')
elif ie_result.get('description') is None: elif ie_result.get('description') is None:
self.report_warning(f'There\'s no {label} description to write') self.to_screen(f'[info] There\'s no {label} description to write')
return False return False
else: else:
try: try:
@ -3937,15 +3965,18 @@ def _write_subtitles(self, info_dict, filename):
''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
ret = [] ret = []
subtitles = info_dict.get('requested_subtitles') subtitles = info_dict.get('requested_subtitles')
if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
# subtitles download errors are already managed as troubles in relevant IE # subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE # that way it will silently go on when used with unsupporting IE
return ret return ret
elif not subtitles:
self.to_screen('[info] There\'s no subtitles for the requested languages')
return ret
sub_filename_base = self.prepare_filename(info_dict, 'subtitle') sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
if not sub_filename_base: if not sub_filename_base:
self.to_screen('[info] Skipping writing video subtitles') self.to_screen('[info] Skipping writing video subtitles')
return ret return ret
for sub_lang, sub_info in subtitles.items(): for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext'] sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
@ -3992,6 +4023,9 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
thumbnails, ret = [], [] thumbnails, ret = [], []
if write_all or self.params.get('writethumbnail', False): if write_all or self.params.get('writethumbnail', False):
thumbnails = info_dict.get('thumbnails') or [] thumbnails = info_dict.get('thumbnails') or []
if not thumbnails:
self.to_screen(f'[info] There\'s no {label} thumbnails to download')
return ret
multiple = write_all and len(thumbnails) > 1 multiple = write_all and len(thumbnails) > 1
if thumb_filename_base is None: if thumb_filename_base is None:

View file

@ -332,7 +332,7 @@ def parse_chapters(name, value):
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_) mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf')) dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
if None in (dur or [None]): if None in (dur or [None]):
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end') raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
ranges.append(dur) ranges.append(dur)
continue continue
try: try:
@ -386,10 +386,12 @@ def metadataparser_actions(f):
raise ValueError(f'{cmd} is invalid; {err}') raise ValueError(f'{cmd} is invalid; {err}')
yield action yield action
parse_metadata = opts.parse_metadata or []
if opts.metafromtitle is not None: if opts.metafromtitle is not None:
parse_metadata.append('title:%s' % opts.metafromtitle) opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle)
opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) opts.parse_metadata = {
k: list(itertools.chain(*map(metadataparser_actions, v)))
for k, v in opts.parse_metadata.items()
}
# Other options # Other options
if opts.playlist_items is not None: if opts.playlist_items is not None:
@ -561,11 +563,11 @@ def report_deprecation(val, old, new=None):
def get_postprocessors(opts): def get_postprocessors(opts):
yield from opts.add_postprocessors yield from opts.add_postprocessors
if opts.parse_metadata: for when, actions in opts.parse_metadata.items():
yield { yield {
'key': 'MetadataParser', 'key': 'MetadataParser',
'actions': opts.parse_metadata, 'actions': actions,
'when': 'pre_process' 'when': when
} }
sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
if sponsorblock_query: if sponsorblock_query:
@ -701,7 +703,7 @@ def parse_options(argv=None):
postprocessors = list(get_postprocessors(opts)) postprocessors = list(get_postprocessors(opts))
print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[2:]) print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:])
any_getting = any(getattr(opts, k) for k in ( any_getting = any(getattr(opts, k) for k in (
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
@ -853,6 +855,7 @@ def parse_options(argv=None):
'legacyserverconnect': opts.legacy_server_connect, 'legacyserverconnect': opts.legacy_server_connect,
'nocheckcertificate': opts.no_check_certificate, 'nocheckcertificate': opts.no_check_certificate,
'prefer_insecure': opts.prefer_insecure, 'prefer_insecure': opts.prefer_insecure,
'enable_file_urls': opts.enable_file_urls,
'http_headers': opts.headers, 'http_headers': opts.headers,
'proxy': opts.proxy, 'proxy': opts.proxy,
'socket_timeout': opts.socket_timeout, 'socket_timeout': opts.socket_timeout,

View file

@ -5,6 +5,7 @@
import re import re
import shutil import shutil
import traceback import traceback
import urllib.parse
from .utils import expand_path, traverse_obj, version_tuple, write_json_file from .utils import expand_path, traverse_obj, version_tuple, write_json_file
from .version import __version__ from .version import __version__
@ -22,11 +23,9 @@ def _get_root_dir(self):
return expand_path(res) return expand_path(res)
def _get_cache_fn(self, section, key, dtype): def _get_cache_fn(self, section, key, dtype):
assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}'
'invalid section %r' % section key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters
assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}')
return os.path.join(
self._get_root_dir(), section, f'{key}.{dtype}')
@property @property
def enabled(self): def enabled(self):

View file

@ -1,9 +1,11 @@
import enum import enum
import json
import os.path import os.path
import re import re
import subprocess import subprocess
import sys import sys
import time import time
import uuid
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import functools from ..compat import functools
@ -20,8 +22,10 @@
determine_ext, determine_ext,
encodeArgument, encodeArgument,
encodeFilename, encodeFilename,
find_available_port,
handle_youtubedl_headers, handle_youtubedl_headers,
remove_end, remove_end,
sanitized_Request,
traverse_obj, traverse_obj,
) )
@ -60,7 +64,6 @@ def real_download(self, filename, info_dict):
} }
if filename != '-': if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
status.update({ status.update({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@ -129,8 +132,7 @@ def _call_downloader(self, tmpfilename, info_dict):
self._debug_cmd(cmd) self._debug_cmd(cmd)
if 'fragments' not in info_dict: if 'fragments' not in info_dict:
_, stderr, returncode = Popen.run( _, stderr, returncode = self._call_process(cmd, info_dict)
cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
if returncode and stderr: if returncode and stderr:
self.to_stderr(stderr) self.to_stderr(stderr)
return returncode return returncode
@ -140,7 +142,7 @@ def _call_downloader(self, tmpfilename, info_dict):
retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
frag_index=None, fatal=not skip_unavailable_fragments) frag_index=None, fatal=not skip_unavailable_fragments)
for retry in retry_manager: for retry in retry_manager:
_, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE) _, stderr, returncode = self._call_process(cmd, info_dict)
if not returncode: if not returncode:
break break
# TODO: Decide whether to retry based on error code # TODO: Decide whether to retry based on error code
@ -172,6 +174,9 @@ def _call_downloader(self, tmpfilename, info_dict):
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
return 0 return 0
def _call_process(self, cmd, info_dict):
return Popen.run(cmd, text=True, stderr=subprocess.PIPE)
class CurlFD(ExternalFD): class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V' AVAILABLE_OPT = '-V'
@ -256,6 +261,15 @@ def supports_manifest(manifest):
def _aria2c_filename(fn): def _aria2c_filename(fn):
return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
def _call_downloader(self, tmpfilename, info_dict):
# FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931
if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []):
info_dict['__rpc'] = {
'port': find_available_port() or 19190,
'secret': str(uuid.uuid4()),
}
return super()._call_downloader(tmpfilename, info_dict)
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c', cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
@ -276,6 +290,12 @@ def _make_cmd(self, tmpfilename, info_dict):
cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
cmd += self._configuration_args() cmd += self._configuration_args()
if '__rpc' in info_dict:
cmd += [
'--enable-rpc',
f'--rpc-listen-port={info_dict["__rpc"]["port"]}',
f'--rpc-secret={info_dict["__rpc"]["secret"]}']
# aria2c strips out spaces from the beginning/end of filenames and paths. # aria2c strips out spaces from the beginning/end of filenames and paths.
# We work around this issue by adding a "./" to the beginning of the # We work around this issue by adding a "./" to the beginning of the
# filename and relative path, and adding a "/" at the end of the path. # filename and relative path, and adding a "/" at the end of the path.
@ -304,6 +324,88 @@ def _make_cmd(self, tmpfilename, info_dict):
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
# Does not actually need to be UUID, just unique
sanitycheck = str(uuid.uuid4())
d = json.dumps({
'jsonrpc': '2.0',
'id': sanitycheck,
'method': method,
'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8')
request = sanitized_Request(
f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={
'Content-Type': 'application/json',
'Content-Length': f'{len(d)}',
'Ytdl-request-proxy': '__noproxy__',
})
with self.ydl.urlopen(request) as r:
resp = json.load(r)
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
return resp['result']
def _call_process(self, cmd, info_dict):
if '__rpc' not in info_dict:
return super()._call_process(cmd, info_dict)
send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret'])
started = time.time()
fragmented = 'fragments' in info_dict
frag_count = len(info_dict['fragments']) if fragmented else 1
status = {
'filename': info_dict.get('_filename'),
'status': 'downloading',
'elapsed': 0,
'downloaded_bytes': 0,
'fragment_count': frag_count if fragmented else None,
'fragment_index': 0 if fragmented else None,
}
self._hook_progress(status, info_dict)
def get_stat(key, *obj, average=False):
val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0]
return sum(val) / (len(val) if average else 1)
with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p:
# Add a small sleep so that RPC client can receive response,
# or the connection stalls infinitely
time.sleep(0.2)
retval = p.poll()
while retval is None:
# We don't use tellStatus as we won't know the GID without reading stdout
# Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive
active = send_rpc('aria2.tellActive')
completed = send_rpc('aria2.tellStopped', [0, frag_count])
downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active)
speed = get_stat('downloadSpeed', active)
total = frag_count * get_stat('totalLength', active, completed, average=True)
if total < downloaded:
total = None
status.update({
'downloaded_bytes': int(downloaded),
'speed': speed,
'total_bytes': None if fragmented else total,
'total_bytes_estimate': total,
'eta': (total - downloaded) / (speed or 1),
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
'elapsed': time.time() - started
})
self._hook_progress(status, info_dict)
if not active and len(completed) >= frag_count:
send_rpc('aria2.shutdown')
retval = p.wait()
break
time.sleep(0.1)
retval = p.poll()
return '', p.stderr.read(), retval
class HttpieFD(ExternalFD): class HttpieFD(ExternalFD):
AVAILABLE_OPT = '--version' AVAILABLE_OPT = '--version'

View file

@ -79,6 +79,7 @@
) )
from .airmozilla import AirMozillaIE from .airmozilla import AirMozillaIE
from .airtv import AirTVIE from .airtv import AirTVIE
from .aitube import AitubeKZVideoIE
from .aljazeera import AlJazeeraIE from .aljazeera import AlJazeeraIE
from .alphaporno import AlphaPornoIE from .alphaporno import AlphaPornoIE
from .amara import AmaraIE from .amara import AmaraIE
@ -87,7 +88,10 @@
AluraCourseIE AluraCourseIE
) )
from .amcnetworks import AMCNetworksIE from .amcnetworks import AMCNetworksIE
from .amazon import AmazonStoreIE from .amazon import (
AmazonStoreIE,
AmazonReviewsIE,
)
from .amazonminitv import ( from .amazonminitv import (
AmazonMiniTVIE, AmazonMiniTVIE,
AmazonMiniTVSeasonIE, AmazonMiniTVSeasonIE,
@ -184,6 +188,10 @@
from .beeg import BeegIE from .beeg import BeegIE
from .behindkink import BehindKinkIE from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE from .bellmedia import BellMediaIE
from .beatbump import (
BeatBumpVideoIE,
BeatBumpPlaylistIE,
)
from .beatport import BeatportIE from .beatport import BeatportIE
from .berufetv import BerufeTVIE from .berufetv import BerufeTVIE
from .bet import BetIE from .bet import BetIE
@ -467,6 +475,8 @@
from .drtv import ( from .drtv import (
DRTVIE, DRTVIE,
DRTVLiveIE, DRTVLiveIE,
DRTVSeasonIE,
DRTVSeriesIE,
) )
from .dtube import DTubeIE from .dtube import DTubeIE
from .dvtv import DVTVIE from .dvtv import DVTVIE
@ -827,6 +837,7 @@
from .kakao import KakaoIE from .kakao import KakaoIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .kanal2 import Kanal2IE from .kanal2 import Kanal2IE
from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
@ -836,6 +847,10 @@
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
) )
from .kick import (
KickIE,
KickVODIE,
)
from .kicker import KickerIE from .kicker import KickerIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
@ -1155,6 +1170,7 @@
from .netverse import ( from .netverse import (
NetverseIE, NetverseIE,
NetversePlaylistIE, NetversePlaylistIE,
NetverseSearchIE,
) )
from .newgrounds import ( from .newgrounds import (
NewgroundsIE, NewgroundsIE,
@ -1405,6 +1421,8 @@
from .polsatgo import PolsatGoIE from .polsatgo import PolsatGoIE
from .polskieradio import ( from .polskieradio import (
PolskieRadioIE, PolskieRadioIE,
PolskieRadioLegacyIE,
PolskieRadioAuditionIE,
PolskieRadioCategoryIE, PolskieRadioCategoryIE,
PolskieRadioPlayerIE, PolskieRadioPlayerIE,
PolskieRadioPodcastIE, PolskieRadioPodcastIE,
@ -1537,7 +1555,10 @@
) )
from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
from .rozhlas import RozhlasIE from .rozhlas import (
RozhlasIE,
RozhlasVltavaIE,
)
from .rte import RteIE, RteRadioIE from .rte import RteIE, RteRadioIE
from .rtlnl import ( from .rtlnl import (
RtlNlIE, RtlNlIE,
@ -1695,6 +1716,7 @@
SoundcloudSetIE, SoundcloudSetIE,
SoundcloudRelatedIE, SoundcloudRelatedIE,
SoundcloudUserIE, SoundcloudUserIE,
SoundcloudUserPermalinkIE,
SoundcloudTrackStationIE, SoundcloudTrackStationIE,
SoundcloudPlaylistIE, SoundcloudPlaylistIE,
SoundcloudSearchIE, SoundcloudSearchIE,
@ -1856,6 +1878,11 @@
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE from .thisoldhouse import ThisOldHouseIE
from .thisvid import (
ThisVidIE,
ThisVidMemberIE,
ThisVidPlaylistIE,
)
from .threespeak import ( from .threespeak import (
ThreeSpeakIE, ThreeSpeakIE,
ThreeSpeakUserIE, ThreeSpeakUserIE,
@ -1868,6 +1895,7 @@
TikTokEffectIE, TikTokEffectIE,
TikTokTagIE, TikTokTagIE,
TikTokVMIE, TikTokVMIE,
TikTokLiveIE,
DouyinIE, DouyinIE,
) )
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
@ -1905,6 +1933,7 @@
TrovoChannelVodIE, TrovoChannelVodIE,
TrovoChannelClipIE, TrovoChannelClipIE,
) )
from .trtcocuk import TrtCocukVideoIE
from .trueid import TrueIDIE from .trueid import TrueIDIE
from .trunews import TruNewsIE from .trunews import TruNewsIE
from .truth import TruthIE from .truth import TruthIE
@ -2088,6 +2117,13 @@
) )
from .videodetective import VideoDetectiveIE from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
from .videoken import (
VideoKenIE,
VideoKenPlayerIE,
VideoKenPlaylistIE,
VideoKenCategoryIE,
VideoKenTopicIE,
)
from .videomore import ( from .videomore import (
VideomoreIE, VideomoreIE,
VideomoreVideoIE, VideomoreVideoIE,
@ -2155,6 +2191,7 @@
VoicyIE, VoicyIE,
VoicyChannelIE, VoicyChannelIE,
) )
from .volejtv import VolejTVIE
from .voot import ( from .voot import (
VootIE, VootIE,
VootSeriesIE, VootSeriesIE,
@ -2237,6 +2274,7 @@
WSJArticleIE, WSJArticleIE,
) )
from .wwe import WWEIE from .wwe import WWEIE
from .xanimu import XanimuIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE from .xfileshare import XFileShareIE

View file

@ -168,7 +168,7 @@ def _real_extract(self, url):
}, data=b'')['token'] }, data=b'')['token']
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) self._K = ''.join(random.choices('0123456789abcdef', k=16))
message = bytes_to_intlist(json.dumps({ message = bytes_to_intlist(json.dumps({
'k': self._K, 'k': self._K,
't': token, 't': token,

View file

@ -0,0 +1,60 @@
from .common import InfoExtractor
from ..utils import int_or_none, merge_dicts
class AitubeKZVideoIE(InfoExtractor):
_VALID_URL = r'https?://aitube\.kz/(?:video|embed/)\?(?:[^\?]+)?id=(?P<id>[\w-]+)'
_TESTS = [{
# id paramater as first parameter
'url': 'https://aitube.kz/video?id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7&season=1',
'info_dict': {
'id': '9291d29b-c038-49a1-ad42-3da2051d353c',
'ext': 'mp4',
'duration': 2174.0,
'channel_id': '94962f73-013b-432c-8853-1bd78ca860fe',
'like_count': int,
'channel': 'ASTANA TV',
'comment_count': int,
'view_count': int,
'description': 'Смотреть любимые сериалы и видео, поделиться видео и сериалами с друзьями и близкими',
'thumbnail': 'https://cdn.static02.aitube.kz/kz.aitudala.aitube.staticaccess/files/ddf2a2ff-bee3-409b-b5f2-2a8202bba75b',
'upload_date': '20221102',
'timestamp': 1667370519,
'title': 'Ангел хранитель 1 серия',
'channel_follower_count': int,
}
}, {
# embed url
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
'only_matching': True,
}, {
# id parameter is not as first paramater
'url': 'https://aitube.kz/video?season=1&id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
nextjs_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['videoInfo']
json_ld_data = self._search_json_ld(webpage, video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'https://api-http.aitube.kz/kz.aitudala.aitube.staticaccess/video/{video_id}/video', video_id)
return merge_dicts({
'id': video_id,
'title': nextjs_data.get('title') or self._html_search_meta(['name', 'og:title'], webpage),
'description': nextjs_data.get('description'),
'formats': formats,
'subtitles': subtitles,
'view_count': (nextjs_data.get('viewCount')
or int_or_none(self._html_search_meta('ya:ovs:views_total', webpage))),
'like_count': nextjs_data.get('likeCount'),
'channel': nextjs_data.get('channelTitle'),
'channel_id': nextjs_data.get('channelId'),
'thumbnail': nextjs_data.get('coverUrl'),
'comment_count': nextjs_data.get('commentCount'),
'channel_follower_count': int_or_none(nextjs_data.get('channelSubscriberCount')),
}, json_ld_data)

View file

@ -1,5 +1,17 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none from ..utils import (
ExtractorError,
clean_html,
float_or_none,
get_element_by_attribute,
get_element_by_class,
int_or_none,
js_to_json,
traverse_obj,
url_or_none,
)
class AmazonStoreIE(InfoExtractor): class AmazonStoreIE(InfoExtractor):
@ -9,7 +21,7 @@ class AmazonStoreIE(InfoExtractor):
'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',
'info_dict': { 'info_dict': {
'id': 'B098XNCHLD', 'id': 'B098XNCHLD',
'title': 'md5:dae240564cbb2642170c02f7f0d7e472', 'title': str,
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
'playlist': [{ 'playlist': [{
@ -20,28 +32,32 @@ class AmazonStoreIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 34, 'duration': 34,
}, },
}] }],
'expected_warnings': ['Unable to extract data'],
}, { }, {
'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3', 'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3',
'info_dict': { 'info_dict': {
'id': 'B0863TXGM3', 'id': 'B0863TXGM3',
'title': 'md5:d1d3352428f8f015706c84b31e132169', 'title': str,
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
'expected_warnings': ['Unable to extract data'],
}, { }, {
'url': 'https://www.amazon.com/dp/B0845NXCXF/', 'url': 'https://www.amazon.com/dp/B0845NXCXF/',
'info_dict': { 'info_dict': {
'id': 'B0845NXCXF', 'id': 'B0845NXCXF',
'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e', 'title': str,
}, },
'playlist-mincount': 1, 'playlist-mincount': 1,
'expected_warnings': ['Unable to extract data'],
}, { }, {
'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ', 'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ',
'info_dict': { 'info_dict': {
'id': 'B08WX337PQ', 'id': 'B08WX337PQ',
'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e', 'title': str,
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
'expected_warnings': ['Unable to extract data'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -52,7 +68,7 @@ def _real_extract(self, url):
try: try:
data_json = self._search_json( data_json = self._search_json(
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
transform_source=lambda x: x.replace(R'\\u', R'\u')) transform_source=js_to_json)
except ExtractorError as e: except ExtractorError as e:
retry.error = e retry.error = e
@ -66,3 +82,89 @@ def _real_extract(self, url):
'width': int_or_none(video.get('videoWidth')), 'width': int_or_none(video.get('videoWidth')),
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
class AmazonReviewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/gp/customer-reviews/(?P<id>[^/&#$?]+)'
_TESTS = [{
'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl',
'info_dict': {
'id': 'R10VE9VUSY19L3',
'ext': 'mp4',
'title': 'Get squad #Suspicious',
'description': 'md5:7012695052f440a1e064e402d87e0afb',
'uploader': 'Kimberly Cronkright',
'average_rating': 1.0,
'thumbnail': r're:^https?://.*\.jpg$',
},
'expected_warnings': ['Review body was not found in webpage'],
}, {
'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl?language=es_US',
'info_dict': {
'id': 'R10VE9VUSY19L3',
'ext': 'mp4',
'title': 'Get squad #Suspicious',
'description': 'md5:7012695052f440a1e064e402d87e0afb',
'uploader': 'Kimberly Cronkright',
'average_rating': 1.0,
'thumbnail': r're:^https?://.*\.jpg$',
},
'expected_warnings': ['Review body was not found in webpage'],
}, {
'url': 'https://www.amazon.in/gp/customer-reviews/RV1CO8JN5VGXV/',
'info_dict': {
'id': 'RV1CO8JN5VGXV',
'ext': 'mp4',
'title': 'Not sure about its durability',
'description': 'md5:1a252c106357f0a3109ebf37d2e87494',
'uploader': 'Shoaib Gulzar',
'average_rating': 2.0,
'thumbnail': r're:^https?://.*\.jpg$',
},
'expected_warnings': ['Review body was not found in webpage'],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
for retry in self.RetryManager():
webpage = self._download_webpage(url, video_id)
review_body = get_element_by_attribute('data-hook', 'review-body', webpage)
if not review_body:
retry.error = ExtractorError('Review body was not found in webpage', expected=True)
formats, subtitles = [], {}
manifest_url = self._search_regex(
r'data-video-url="([^"]+)"', review_body, 'm3u8 url', default=None)
if url_or_none(manifest_url):
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
manifest_url, video_id, 'mp4', fatal=False)
formats.extend(fmts)
video_url = self._search_regex(
r'<input[^>]+\bvalue="([^"]+)"[^>]+\bclass="video-url"', review_body, 'mp4 url', default=None)
if url_or_none(video_url):
formats.append({
'url': video_url,
'ext': 'mp4',
'format_id': 'http-mp4',
})
if not formats:
self.raise_no_formats('No video found for this customer review', expected=True)
return {
'id': video_id,
'title': (clean_html(get_element_by_attribute('data-hook', 'review-title', webpage))
or self._html_extract_title(webpage)),
'description': clean_html(traverse_obj(re.findall(
r'<span(?:\s+class="cr-original-review-content")?>(.+?)</span>', review_body), -1)),
'uploader': clean_html(get_element_by_class('a-profile-name', webpage)),
'average_rating': float_or_none(clean_html(get_element_by_attribute(
'data-hook', 'review-star-rating', webpage) or '').partition(' ')[0]),
'thumbnail': self._search_regex(
r'data-thumbnail-url="([^"]+)"', review_body, 'thumbnail', default=None),
'formats': formats,
'subtitles': subtitles,
}

View file

@ -65,6 +65,21 @@ class ArteTVIE(ArteTVBaseIE):
}, { }, {
'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE', 'url': 'https://api.arte.tv/api/player/v2/config/de/LIVE',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
'info_dict': {
'id': '110203-006-A',
'chapters': 'count:16',
'description': 'md5:cf592f1df52fe52007e3f8eac813c084',
'alt_title': 'Zaz',
'title': 'Baloise Session 2022',
'timestamp': 1668445200,
'duration': 4054,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/ubQjmVCGyRx3hmBuZEK9QZ/940x530',
'upload_date': '20221114',
'ext': 'mp4',
},
'expected_warnings': ['geo restricted']
}] }]
_GEO_BYPASS = True _GEO_BYPASS = True
@ -180,10 +195,6 @@ def _real_extract(self, url):
else: else:
self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}') self.report_warning(f'Skipping stream with unknown protocol {stream["protocol"]}')
# TODO: chapters from stream['segments']?
# The JS also looks for chapters in config['data']['attributes']['chapters'],
# but I am yet to find a video having those
formats.extend(secondary_formats) formats.extend(secondary_formats)
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
@ -205,6 +216,11 @@ def _real_extract(self, url):
{'url': image['url'], 'id': image.get('caption')} {'url': image['url'], 'id': image.get('caption')}
for image in metadata.get('images') or [] if url_or_none(image.get('url')) for image in metadata.get('images') or [] if url_or_none(image.get('url'))
], ],
# TODO: chapters may also be in stream['segments']?
'chapters': traverse_obj(config, ('data', 'attributes', 'chapters', 'elements', ..., {
'start_time': 'startTime',
'title': 'title',
})) or None,
} }

View file

@ -29,11 +29,18 @@ class BandcampIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '1812978515', 'id': '1812978515',
'ext': 'mp3', 'ext': 'mp3',
'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭", 'title': 'youtube-dl "\'/\\ä↭ - youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
'duration': 9.8485, 'duration': 9.8485,
'uploader': 'youtube-dl "\'/\\ä↭', 'uploader': 'youtube-dl "\'/\\ä↭',
'upload_date': '20121129', 'upload_date': '20121129',
'timestamp': 1354224127, 'timestamp': 1354224127,
'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
'album_artist': 'youtube-dl "\'/\\ä↭',
'track_id': '1812978515',
'artist': 'youtube-dl "\'/\\ä↭',
'uploader_url': 'https://youtube-dl.bandcamp.com',
'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
}, },
'_skip': 'There is a limit of 200 free downloads / month for the test song' '_skip': 'There is a limit of 200 free downloads / month for the test song'
}, { }, {
@ -41,7 +48,8 @@ class BandcampIE(InfoExtractor):
'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'info_dict': { 'info_dict': {
'id': '2650410135', 'id': '2650410135',
'ext': 'aiff', 'ext': 'm4a',
'acodec': r're:[fa]lac',
'title': 'Ben Prunty - Lanius (Battle)', 'title': 'Ben Prunty - Lanius (Battle)',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Ben Prunty', 'uploader': 'Ben Prunty',
@ -54,7 +62,10 @@ class BandcampIE(InfoExtractor):
'track_number': 1, 'track_number': 1,
'track_id': '2650410135', 'track_id': '2650410135',
'artist': 'Ben Prunty', 'artist': 'Ben Prunty',
'album_artist': 'Ben Prunty',
'album': 'FTL: Advanced Edition Soundtrack', 'album': 'FTL: Advanced Edition Soundtrack',
'uploader_url': 'https://benprunty.bandcamp.com',
'uploader_id': 'benprunty',
}, },
}, { }, {
# no free download, mp3 128 # no free download, mp3 128
@ -75,7 +86,34 @@ class BandcampIE(InfoExtractor):
'track_number': 5, 'track_number': 5,
'track_id': '2584466013', 'track_id': '2584466013',
'artist': 'Mastodon', 'artist': 'Mastodon',
'album_artist': 'Mastodon',
'album': 'Call of the Mastodon', 'album': 'Call of the Mastodon',
'uploader_url': 'https://relapsealumni.bandcamp.com',
'uploader_id': 'relapsealumni',
},
}, {
# track from compilation album (artist/album_artist difference)
'url': 'https://diskotopia.bandcamp.com/track/safehouse',
'md5': '19c5337bca1428afa54129f86a2f6a69',
'info_dict': {
'id': '1978174799',
'ext': 'mp3',
'title': 'submerse - submerse - Safehouse',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'submerse',
'timestamp': 1480779297,
'upload_date': '20161203',
'release_timestamp': 1481068800,
'release_date': '20161207',
'duration': 154.066,
'track': 'submerse - Safehouse',
'track_number': 3,
'track_id': '1978174799',
'artist': 'submerse',
'album_artist': 'Diskotopia',
'album': 'DSK F/W 2016-2017 Free Compilation',
'uploader_url': 'https://diskotopia.bandcamp.com',
'uploader_id': 'diskotopia',
}, },
}] }]
@ -121,6 +159,9 @@ def _real_extract(self, url):
embed = self._extract_data_attr(webpage, title, 'embed', False) embed = self._extract_data_attr(webpage, title, 'embed', False)
current = tralbum.get('current') or {} current = tralbum.get('current') or {}
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist') artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
album_artist = self._html_search_regex(
r'<h3 class="albumTitle">[\S\s]*?by\s*<span>\s*<a href="[^>]+">\s*([^>]+?)\s*</a>',
webpage, 'album artist', fatal=False)
timestamp = unified_timestamp( timestamp = unified_timestamp(
current.get('publish_date') or tralbum.get('album_publish_date')) current.get('publish_date') or tralbum.get('album_publish_date'))
@ -205,6 +246,7 @@ def _real_extract(self, url):
'track_id': track_id, 'track_id': track_id,
'artist': artist, 'artist': artist,
'album': embed.get('album_title'), 'album': embed.get('album_title'),
'album_artist': album_artist,
'formats': formats, 'formats': formats,
} }

View file

@ -0,0 +1,101 @@
from .common import InfoExtractor
from .youtube import YoutubeIE, YoutubeTabIE
class BeatBumpVideoIE(InfoExtractor):
_VALID_URL = r'https://beatbump\.ml/listen\?id=(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs',
'md5': '5ff3fff41d3935b9810a9731e485fe66',
'info_dict': {
'id': 'MgNrAu2pzNs',
'ext': 'mp4',
'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
'artist': 'Stephen',
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
'upload_date': '20190312',
'categories': ['Music'],
'playable_in_embed': True,
'duration': 169,
'like_count': int,
'alt_title': 'Voyeur Girl',
'view_count': int,
'track': 'Voyeur Girl',
'uploader': 'Stephen - Topic',
'title': 'Voyeur Girl',
'channel_follower_count': int,
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
'age_limit': 0,
'availability': 'public',
'live_status': 'not_live',
'album': 'it\'s too much love to know my dear',
'channel': 'Stephen',
'comment_count': int,
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
'tags': 'count:11',
'creator': 'Stephen',
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
}
}]
def _real_extract(self, url):
id_ = self._match_id(url)
return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_)
class BeatBumpPlaylistIE(InfoExtractor):
_VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE',
'playlist_count': 50,
'info_dict': {
'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
'availability': 'unlisted',
'view_count': int,
'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
'description': '',
'tags': [],
'modified_date': '20221223',
}
}, {
'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg',
'playlist_mincount': 1,
'params': {'flatplaylist': True},
'info_dict': {
'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'channel_follower_count': int,
'title': 'NoCopyrightSounds - Videos',
'uploader': 'NoCopyrightSounds',
'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a',
'channel': 'NoCopyrightSounds',
'tags': 'count:12',
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
},
}, {
'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'playlist_mincount': 1,
'params': {'flatplaylist': True},
'info_dict': {
'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
'view_count': int,
'channel_url': 'https://www.youtube.com/@NoCopyrightSounds',
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'title': 'NCS : All Releases 💿',
'uploader': 'NoCopyrightSounds',
'availability': 'public',
'channel': 'NoCopyrightSounds',
'tags': [],
'modified_date': '20221225',
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
}
}]
def _real_extract(self, url):
id_ = self._match_id(url)
return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_)

View file

@ -16,13 +16,16 @@
format_field, format_field,
int_or_none, int_or_none,
make_archive_id, make_archive_id,
merge_dicts,
mimetype2ext, mimetype2ext,
parse_count, parse_count,
parse_qs, parse_qs,
qualities, qualities,
smuggle_url,
srt_subtitles_timecode, srt_subtitles_timecode,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
unsmuggle_url,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -303,7 +306,8 @@ def _real_extract(self, url):
getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}') getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
if is_anthology: if is_anthology:
title += f' p{part_id:02d} {traverse_obj(page_list_json, ((part_id or 1) - 1, "part")) or ""}' part_id = part_id or 1
title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
aid = video_data.get('aid') aid = video_data.get('aid')
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
@ -880,16 +884,12 @@ def _get_formats(self, *, ep_id=None, aid=None):
return formats return formats
def _extract_video_info(self, video_data, *, ep_id=None, aid=None): def _parse_video_metadata(self, video_data):
return { return {
'id': ep_id or aid,
'title': video_data.get('title_display') or video_data.get('title'), 'title': video_data.get('title_display') or video_data.get('title'),
'thumbnail': video_data.get('cover'), 'thumbnail': video_data.get('cover'),
'episode_number': int_or_none(self._search_regex( 'episode_number': int_or_none(self._search_regex(
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)), r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
'formats': self._get_formats(ep_id=ep_id, aid=aid),
'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid),
'extractor_key': BiliIntlIE.ie_key(),
} }
def _perform_login(self, username, password): def _perform_login(self, username, password):
@ -935,6 +935,10 @@ class BiliIntlIE(BiliIntlBaseIE):
'title': 'E2 - The First Night', 'title': 'E2 - The First Night',
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
'episode_number': 2, 'episode_number': 2,
'upload_date': '20201009',
'episode': 'Episode 2',
'timestamp': 1602259500,
'description': 'md5:297b5a17155eb645e14a14b385ab547e',
} }
}, { }, {
# Non-Bstation page # Non-Bstation page
@ -945,6 +949,10 @@ class BiliIntlIE(BiliIntlBaseIE):
'title': 'E3 - Who?', 'title': 'E3 - Who?',
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
'episode_number': 3, 'episode_number': 3,
'description': 'md5:e1a775e71a35c43f141484715470ad09',
'episode': 'Episode 3',
'upload_date': '20211219',
'timestamp': 1639928700,
} }
}, { }, {
# Subtitle with empty content # Subtitle with empty content
@ -957,6 +965,17 @@ class BiliIntlIE(BiliIntlBaseIE):
'episode_number': 140, 'episode_number': 140,
}, },
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
}, {
'url': 'https://www.bilibili.tv/en/video/2041863208',
'info_dict': {
'id': '2041863208',
'ext': 'mp4',
'timestamp': 1670874843,
'description': 'Scheduled for April 2023.\nStudio: ufotable',
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
'upload_date': '20221212',
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
}
}, { }, {
'url': 'https://www.biliintl.com/en/play/34613/341736', 'url': 'https://www.biliintl.com/en/play/34613/341736',
'only_matching': True, 'only_matching': True,
@ -974,42 +993,78 @@ class BiliIntlIE(BiliIntlBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _make_url(video_id, series_id=None):
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') if series_id:
video_id = ep_id or aid return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
return f'https://www.bilibili.tv/en/video/{video_id}'
def _extract_video_metadata(self, url, video_id, season_id):
url, smuggled_data = unsmuggle_url(url, {})
if smuggled_data.get('title'):
return smuggled_data
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
# Bstation layout # Bstation layout
initial_data = ( initial_data = (
self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={}) self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None)) or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
video_data = traverse_obj( video_data = traverse_obj(
initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
if season_id and not video_data: if season_id and not video_data:
# Non-Bstation layout, read through episode list # Non-Bstation layout, read through episode list
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
video_data = traverse_obj(season_json, video_data = traverse_obj(season_json, (
('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
expected_type=dict, get_all=False) ), expected_type=dict, get_all=False)
return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid)
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
return merge_dicts(
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
'title': self._html_search_meta('og:title', webpage),
'description': self._html_search_meta('og:description', webpage)
})
def _real_extract(self, url):
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
video_id = ep_id or aid
return {
'id': video_id,
**self._extract_video_metadata(url, video_id, season_id),
'formats': self._get_formats(ep_id=ep_id, aid=aid),
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
}
class BiliIntlSeriesIE(BiliIntlBaseIE): class BiliIntlSeriesIE(BiliIntlBaseIE):
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?play/(?P<id>\d+)/?(?:[?#]|$)' IE_NAME = 'biliIntl:series'
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bilibili.tv/en/play/34613', 'url': 'https://www.bilibili.tv/en/play/34613',
'playlist_mincount': 15, 'playlist_mincount': 15,
'info_dict': { 'info_dict': {
'id': '34613', 'id': '34613',
'title': 'Fly Me to the Moon', 'title': 'TONIKAWA: Over the Moon For You',
'description': 'md5:a861ee1c4dc0acfad85f557cc42ac627', 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
'categories': ['Romance', 'Comedy', 'Slice of life'], 'categories': ['Slice of life', 'Comedy', 'Romance'],
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
'view_count': int, 'view_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.bilibili.tv/en/media/1048837',
'info_dict': {
'id': '1048837',
'title': 'SPY×FAMILY',
'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
'categories': ['Adventure', 'Action', 'Comedy'],
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
'view_count': int,
},
'playlist_mincount': 25,
}, { }, {
'url': 'https://www.biliintl.com/en/play/34613', 'url': 'https://www.biliintl.com/en/play/34613',
'only_matching': True, 'only_matching': True,
@ -1020,9 +1075,12 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
def _entries(self, series_id): def _entries(self, series_id):
series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id) series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]): for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
episode_id = str(episode.get('episode_id')) episode_id = str(episode['episode_id'])
yield self._extract_video_info(episode, ep_id=episode_id) yield self.url_result(smuggle_url(
BiliIntlIE._make_url(episode_id, series_id),
self._parse_video_metadata(episode)
), BiliIntlIE, episode_id)
def _real_extract(self, url): def _real_extract(self, url):
series_id = self._match_id(url) series_id = self._match_id(url)
@ -1034,7 +1092,7 @@ def _real_extract(self, url):
class BiliLiveIE(InfoExtractor): class BiliLiveIE(InfoExtractor):
_VALID_URL = r'https?://live.bilibili.com/(blanc/)?(?P<id>\d+)' _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://live.bilibili.com/196', 'url': 'https://live.bilibili.com/196',
@ -1114,6 +1172,7 @@ def _real_extract(self, url):
'thumbnail': room_data.get('user_cover'), 'thumbnail': room_data.get('user_cover'),
'timestamp': stream_data.get('live_time'), 'timestamp': stream_data.get('live_time'),
'formats': formats, 'formats': formats,
'is_live': True,
'http_headers': { 'http_headers': {
'Referer': url, 'Referer': url,
}, },

View file

@ -4,6 +4,7 @@
import hashlib import hashlib
import hmac import hmac
import json import json
import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -27,11 +28,10 @@ class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)' _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_NETRC_MACHINE = 'cdapl' _NETRC_MACHINE = 'cdapl'
_BASE_URL = 'http://www.cda.pl/' _BASE_URL = 'https://www.cda.pl'
_BASE_API_URL = 'https://api.cda.pl' _BASE_API_URL = 'https://api.cda.pl'
_API_HEADERS = { _API_HEADERS = {
'Accept': 'application/vnd.cda.public+json', 'Accept': 'application/vnd.cda.public+json',
'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)',
} }
# hardcoded in the app # hardcoded in the app
_LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q' _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
@ -101,6 +101,38 @@ def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
}, **kwargs) }, **kwargs)
def _perform_login(self, username, password): def _perform_login(self, username, password):
app_version = random.choice((
'1.2.88 build 15306',
'1.2.174 build 18469',
))
android_version = random.randrange(8, 14)
phone_model = random.choice((
# x-kom.pl top selling Android smartphones, as of 2022-12-26
# https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
'ASUS ZenFone 8',
'Motorola edge 20 5G',
'Motorola edge 30 neo 5G',
'Motorola moto g22',
'OnePlus Nord 2T 5G',
'Samsung Galaxy A32 SMA325F',
'Samsung Galaxy M13',
'Samsung Galaxy S20 FE 5G',
'Xiaomi 11T',
'Xiaomi POCO M4 Pro',
'Xiaomi Redmi 10',
'Xiaomi Redmi 10C',
'Xiaomi Redmi 9C NFC',
'Xiaomi Redmi Note 10 Pro',
'Xiaomi Redmi Note 11 Pro',
'Xiaomi Redmi Note 11',
'Xiaomi Redmi Note 11S 5G',
'Xiaomi Redmi Note 11S',
'realme 10',
'realme 9 Pro+',
'vivo Y33s',
))
self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {} cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5: if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}' self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
@ -138,9 +170,6 @@ def _api_extract(self, video_id):
meta = self._download_json( meta = self._download_json(
f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video'] f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video']
if meta.get('premium') and not meta.get('premium_free'):
self.report_drm(video_id)
uploader = traverse_obj(meta, 'author', 'login') uploader = traverse_obj(meta, 'author', 'login')
formats = [{ formats = [{
@ -151,6 +180,10 @@ def _api_extract(self, video_id):
'filesize': quality.get('length'), 'filesize': quality.get('length'),
} for quality in meta['qualities'] if quality.get('file')] } for quality in meta['qualities'] if quality.get('file')]
if meta.get('premium') and not meta.get('premium_free') and not formats:
raise ExtractorError(
'Video requires CDA Premium - subscription needed', expected=True)
return { return {
'id': video_id, 'id': video_id,
'title': meta.get('title'), 'title': meta.get('title'),
@ -167,10 +200,10 @@ def _api_extract(self, video_id):
def _web_extract(self, video_id, url): def _web_extract(self, video_id, url):
self._set_cookie('cda.pl', 'cda.player', 'html5') self._set_cookie('cda.pl', 'cda.player', 'html5')
webpage = self._download_webpage( webpage = self._download_webpage(
self._BASE_URL + '/video/' + video_id, video_id) f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage: if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True) self.raise_login_required('This video is only available for premium users')
if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage): if re.search(r'niedostępn[ey] w(?:&nbsp;|\s+)Twoim kraju\s*<', webpage):
self.raise_geo_restricted() self.raise_geo_restricted()

View file

@ -1,5 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none, int_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
@ -38,11 +39,30 @@ def _real_extract(self, url):
siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
video_id = mobj.group('id') video_id = mobj.group('id')
stream = self._download_json( password = self.get_param('videopassword')
headers = {'Accept': 'application/json'}
if password:
headers['accessPwd'] = password
stream, urlh = self._download_json_handle(
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
video_id, fatal=False, query={'siteurl': siteurl}) video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
if not stream:
self.raise_login_required(method='cookies') if urlh.status == 403:
if stream['code'] == 53004:
self.raise_login_required()
if stream['code'] == 53005:
if password:
raise ExtractorError('Wrong password', expected=True)
raise ExtractorError(
'This video is protected by a password, use the --video-password option', expected=True)
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
if urlh.status == 429:
self.raise_login_required(
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
method='cookies')
video_id = stream.get('recordUUID') or video_id video_id = stream.get('recordUUID') or video_id
@ -78,7 +98,7 @@ def _real_extract(self, url):
'title': stream['recordName'], 'title': stream['recordName'],
'description': stream.get('description'), 'description': stream.get('description'),
'uploader': stream.get('ownerDisplayName'), 'uploader': stream.get('ownerDisplayName'),
'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), # mail or id 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'),
'timestamp': unified_timestamp(stream.get('createTime')), 'timestamp': unified_timestamp(stream.get('createTime')),
'duration': int_or_none(stream.get('duration'), 1000), 'duration': int_or_none(stream.get('duration'), 1000),
'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id),

View file

@ -32,6 +32,7 @@
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
HEADRequest,
LenientJSONDecoder, LenientJSONDecoder,
RegexNotFoundError, RegexNotFoundError,
RetryManager, RetryManager,
@ -80,6 +81,7 @@
update_Request, update_Request,
update_url_query, update_url_query,
url_basename, url_basename,
urlhandle_detect_ext,
url_or_none, url_or_none,
urljoin, urljoin,
variadic, variadic,
@ -1262,10 +1264,9 @@ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=Tr
Like _search_regex, but strips HTML tags and unescapes entities. Like _search_regex, but strips HTML tags and unescapes entities.
""" """
res = self._search_regex(pattern, string, name, default, fatal, flags, group) res = self._search_regex(pattern, string, name, default, fatal, flags, group)
if res: if isinstance(res, tuple):
return clean_html(res).strip() return tuple(map(clean_html, res))
else: return clean_html(res)
return res
def _get_netrc_login_info(self, netrc_machine=None): def _get_netrc_login_info(self, netrc_machine=None):
username = None username = None
@ -1396,10 +1397,16 @@ def _rta_search(html):
# And then there are the jokers who advertise that they use RTA, but actually don't. # And then there are the jokers who advertise that they use RTA, but actually don't.
AGE_LIMIT_MARKERS = [ AGE_LIMIT_MARKERS = [
r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>', r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
r'>[^<]*you acknowledge you are at least (\d+) years old',
r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b',
] ]
if any(re.search(marker, html) for marker in AGE_LIMIT_MARKERS):
return 18 age_limit = 0
return 0 for marker in AGE_LIMIT_MARKERS:
mobj = re.search(marker, html)
if mobj:
age_limit = max(age_limit, int(traverse_obj(mobj, 1, default=18)))
return age_limit
def _media_rating_search(self, html): def _media_rating_search(self, html):
# See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
@ -2190,13 +2197,23 @@ def _extract_m3u8_vod_duration(
return self._parse_m3u8_vod_duration(m3u8_vod or '', video_id) return self._parse_m3u8_vod_duration(m3u8_vod or '', video_id)
def _parse_m3u8_vod_duration(self, m3u8_vod, video_id): def _parse_m3u8_vod_duration(self, m3u8_vod, video_id):
if '#EXT-X-PLAYLIST-TYPE:VOD' not in m3u8_vod: if '#EXT-X-ENDLIST' not in m3u8_vod:
return None return None
return int(sum( return int(sum(
float(line[len('#EXTINF:'):].split(',')[0]) float(line[len('#EXTINF:'):].split(',')[0])
for line in m3u8_vod.splitlines() if line.startswith('#EXTINF:'))) or None for line in m3u8_vod.splitlines() if line.startswith('#EXTINF:'))) or None
def _extract_mpd_vod_duration(
self, mpd_url, video_id, note=None, errnote=None, data=None, headers={}, query={}):
mpd_doc = self._download_xml(
mpd_url, video_id,
note='Downloading MPD VOD manifest' if note is None else note,
errnote='Failed to download VOD manifest' if errnote is None else errnote,
fatal=False, data=data, headers=headers, query=query) or {}
return int_or_none(parse_duration(mpd_doc.get('mediaPresentationDuration')))
@staticmethod @staticmethod
def _xpath_ns(path, namespace=None): def _xpath_ns(path, namespace=None):
if not namespace: if not namespace:
@ -2323,7 +2340,8 @@ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_para
height = int_or_none(medium.get('height')) height = int_or_none(medium.get('height'))
proto = medium.get('proto') proto = medium.get('proto')
ext = medium.get('ext') ext = medium.get('ext')
src_ext = determine_ext(src) src_ext = determine_ext(src, default_ext=None) or ext or urlhandle_detect_ext(
self._request_webpage(HEADRequest(src), video_id, note='Requesting extension info', fatal=False))
streamer = medium.get('streamer') or base streamer = medium.get('streamer') or base
if proto == 'rtmp' or streamer.startswith('rtmp'): if proto == 'rtmp' or streamer.startswith('rtmp'):
@ -3233,7 +3251,7 @@ def manifest_url(manifest):
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
mobj = re.search( mobj = re.search(
r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)', r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
webpage) webpage)
if mobj: if mobj:
try: try:
@ -3254,19 +3272,20 @@ def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
# JWPlayer backward compatibility: flattened playlists
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
if 'playlist' not in jwplayer_data:
jwplayer_data = {'playlist': [jwplayer_data]}
entries = [] entries = []
if not isinstance(jwplayer_data, dict):
return entries
# JWPlayer backward compatibility: single playlist item playlist_items = jwplayer_data.get('playlist')
# JWPlayer backward compatibility: single playlist item/flattened playlists
# https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
if not isinstance(jwplayer_data['playlist'], list): # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
jwplayer_data['playlist'] = [jwplayer_data['playlist']] if not isinstance(playlist_items, list):
playlist_items = (playlist_items or jwplayer_data, )
for video_data in jwplayer_data['playlist']: for video_data in playlist_items:
if not isinstance(video_data, dict):
continue
# JWPlayer backward compatibility: flattened sources # JWPlayer backward compatibility: flattened sources
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
if 'sources' not in video_data: if 'sources' not in video_data:
@ -3304,6 +3323,13 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
'timestamp': int_or_none(video_data.get('pubdate')), 'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles, 'subtitles': subtitles,
'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ...
'genre': clean_html(video_data.get('genre')),
'channel': clean_html(dict_get(video_data, ('category', 'channel'))),
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')),
'release_year': int_or_none(video_data.get('releasedate')),
'age_limit': int_or_none(video_data.get('age_restriction')),
} }
# https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32 # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']): if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
@ -3321,7 +3347,7 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
urls = [] urls = set()
formats = [] formats = []
for source in jwplayer_sources_data: for source in jwplayer_sources_data:
if not isinstance(source, dict): if not isinstance(source, dict):
@ -3330,14 +3356,14 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
base_url, self._proto_relative_url(source.get('file'))) base_url, self._proto_relative_url(source.get('file')))
if not source_url or source_url in urls: if not source_url or source_url in urls:
continue continue
urls.append(source_url) urls.add(source_url)
source_type = source.get('type') or '' source_type = source.get('type') or ''
ext = mimetype2ext(source_type) or determine_ext(source_url) ext = mimetype2ext(source_type) or determine_ext(source_url)
if source_type == 'hls' or ext == 'm3u8': if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native', source_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=m3u8_id, fatal=False)) m3u8_id=m3u8_id, fatal=False))
elif source_type == 'dash' or ext == 'mpd': elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url:
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
source_url, video_id, mpd_id=mpd_id, fatal=False)) source_url, video_id, mpd_id=mpd_id, fatal=False))
elif ext == 'smil': elif ext == 'smil':
@ -3352,13 +3378,12 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
'ext': ext, 'ext': ext,
}) })
else: else:
format_id = str_or_none(source.get('label'))
height = int_or_none(source.get('height')) height = int_or_none(source.get('height'))
if height is None: if height is None and format_id:
# Often no height is provided but there is a label in # Often no height is provided but there is a label in
# format like "1080p", "720p SD", or 1080. # format like "1080p", "720p SD", or 1080.
height = int_or_none(self._search_regex( height = parse_resolution(format_id).get('height')
r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''),
'height', default=None))
a_format = { a_format = {
'url': source_url, 'url': source_url,
'width': int_or_none(source.get('width')), 'width': int_or_none(source.get('width')),
@ -3366,6 +3391,7 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
'tbr': int_or_none(source.get('bitrate'), scale=1000), 'tbr': int_or_none(source.get('bitrate'), scale=1000),
'filesize': int_or_none(source.get('filesize')), 'filesize': int_or_none(source.get('filesize')),
'ext': ext, 'ext': ext,
'format_id': format_id
} }
if source_url.startswith('rtmp'): if source_url.startswith('rtmp'):
a_format['ext'] = 'flv' a_format['ext'] = 'flv'
@ -3459,13 +3485,17 @@ def get_testcases(cls, include_onlymatching=False):
continue continue
t['name'] = cls.ie_key() t['name'] = cls.ie_key()
yield t yield t
if getattr(cls, '__wrapped__', None):
yield from cls.__wrapped__.get_testcases(include_onlymatching)
@classmethod @classmethod
def get_webpage_testcases(cls): def get_webpage_testcases(cls):
tests = vars(cls).get('_WEBPAGE_TESTS', []) tests = vars(cls).get('_WEBPAGE_TESTS', [])
for t in tests: for t in tests:
t['name'] = cls.ie_key() t['name'] = cls.ie_key()
return tests yield t
if getattr(cls, '__wrapped__', None):
yield from cls.__wrapped__.get_webpage_testcases()
@classproperty(cache=True) @classproperty(cache=True)
def age_limit(cls): def age_limit(cls):
@ -3511,7 +3541,7 @@ def description(cls, *, markdown=True, search_examples=None):
elif cls.IE_DESC: elif cls.IE_DESC:
desc += f' {cls.IE_DESC}' desc += f' {cls.IE_DESC}'
if cls.SEARCH_KEY: if cls.SEARCH_KEY:
desc += f'; "{cls.SEARCH_KEY}:" prefix' desc += f'{";" if cls.IE_DESC else ""} "{cls.SEARCH_KEY}:" prefix'
if search_examples: if search_examples:
_COUNTS = ('', '5', '10', 'all') _COUNTS = ('', '5', '10', 'all')
desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")' desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
@ -3727,10 +3757,12 @@ def __init_subclass__(cls, *, plugin_name=None, **kwargs):
if plugin_name: if plugin_name:
mro = inspect.getmro(cls) mro = inspect.getmro(cls)
super_class = cls.__wrapped__ = mro[mro.index(cls) + 1] super_class = cls.__wrapped__ = mro[mro.index(cls) + 1]
cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key
cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}'
while getattr(super_class, '__wrapped__', None): while getattr(super_class, '__wrapped__', None):
super_class = super_class.__wrapped__ super_class = super_class.__wrapped__
setattr(sys.modules[super_class.__module__], super_class.__name__, cls) setattr(sys.modules[super_class.__module__], super_class.__name__, cls)
_PLUGIN_OVERRIDES[super_class].append(cls)
return super().__init_subclass__(**kwargs) return super().__init_subclass__(**kwargs)
@ -3787,3 +3819,6 @@ class UnsupportedURLIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
raise UnsupportedError(url) raise UnsupportedError(url)
_PLUGIN_OVERRIDES = collections.defaultdict(list)

View file

@ -182,7 +182,7 @@ def _real_extract(self, url):
self.to_screen( self.to_screen(
'To get all formats of a hardsub language, use ' 'To get all formats of a hardsub language, use '
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". ' '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta for more info', 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
only_once=True) only_once=True)
else: else:
full_format_langs = set(map(str.lower, available_formats)) full_format_langs = set(map(str.lower, available_formats))
@ -291,7 +291,8 @@ def entries():
'season_id': episode.get('season_id'), 'season_id': episode.get('season_id'),
'season_number': episode.get('season_number'), 'season_number': episode.get('season_number'),
'episode': episode.get('title'), 'episode': episode.get('title'),
'episode_number': episode.get('sequence_number') 'episode_number': episode.get('sequence_number'),
'language': episode.get('audio_locale'),
} }
return self.playlist_result(entries(), internal_id, series_response.get('title')) return self.playlist_result(entries(), internal_id, series_response.get('title'))

View file

@ -1,4 +1,5 @@
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
@ -23,7 +24,7 @@ def _call_api(self, path, video_id, query=None):
auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token') auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token')
if auth_cookie: if auth_cookie:
self.write_debug('Obtained auth_token cookie') self.write_debug('Obtained auth_token cookie')
self._auth_token = auth_cookie.value self._auth_token = urllib.parse.unquote(auth_cookie.value)
if self._auth_token: if self._auth_token:
headers['X-Auth-Token'] = self._auth_token headers['X-Auth-Token'] = self._auth_token
result = self._download_json( result = self._download_json(
@ -54,8 +55,11 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
'channel': 'Curiosity Stream', 'channel': 'Curiosity Stream',
'categories': ['Technology', 'Interview'], 'categories': ['Technology', 'Interview'],
'average_rating': 96.79, 'average_rating': float,
'series_id': '2', 'series_id': '2',
'thumbnail': r're:https://img.curiositystream.com/.+\.jpg',
'tags': [],
'duration': 158
}, },
'params': { 'params': {
# m3u8 download # m3u8 download

View file

@ -78,7 +78,7 @@ def _real_extract(self, url):
'Downloading token JSON metadata', query={ 'Downloading token JSON metadata', query={
'authRel': 'authorization', 'authRel': 'authorization',
'client_id': '3020a40c2356a645b4b4', 'client_id': '3020a40c2356a645b4b4',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), 'nonce': ''.join(random.choices(string.ascii_letters, k=32)),
'redirectUri': 'https://www.discovery.com/', 'redirectUri': 'https://www.discovery.com/',
})['access_token'] })['access_token']

View file

@ -2,22 +2,24 @@
import hashlib import hashlib
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
float_or_none, float_or_none,
int_or_none,
mimetype2ext, mimetype2ext,
str_or_none, str_or_none,
traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
url_or_none, url_or_none,
) )
SERIES_API = 'https://production-cdn.dr-massive.com/api/page?device=web_browser&item_detail_expand=all&lang=da&max_list_prefetch=3&path=%s'
class DRTVIE(InfoExtractor): class DRTVIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -141,13 +143,13 @@ class DRTVIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) raw_video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, raw_video_id)
if '>Programmet er ikke længere tilgængeligt' in webpage: if '>Programmet er ikke længere tilgængeligt' in webpage:
raise ExtractorError( raise ExtractorError(
'Video %s is not available' % video_id, expected=True) 'Video %s is not available' % raw_video_id, expected=True)
video_id = self._search_regex( video_id = self._search_regex(
(r'data-(?:material-identifier|episode-slug)="([^"]+)"', (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
@ -182,6 +184,11 @@ def _real_extract(self, url):
data = self._download_json( data = self._download_json(
programcard_url, video_id, 'Downloading video JSON', query=query) programcard_url, video_id, 'Downloading video JSON', query=query)
supplementary_data = {}
if re.search(r'_\d+$', raw_video_id):
supplementary_data = self._download_json(
SERIES_API % f'/episode/{raw_video_id}', raw_video_id, fatal=False) or {}
title = str_or_none(data.get('Title')) or re.sub( title = str_or_none(data.get('Title')) or re.sub(
r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '', r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
self._og_search_title(webpage)) self._og_search_title(webpage))
@ -313,8 +320,8 @@ def decrypt_uri(e):
'season': str_or_none(data.get('SeasonTitle')), 'season': str_or_none(data.get('SeasonTitle')),
'season_number': int_or_none(data.get('SeasonNumber')), 'season_number': int_or_none(data.get('SeasonNumber')),
'season_id': str_or_none(data.get('SeasonUrn')), 'season_id': str_or_none(data.get('SeasonUrn')),
'episode': str_or_none(data.get('EpisodeTitle')), 'episode': traverse_obj(supplementary_data, ('entries', 0, 'item', 'contextualTitle')) or str_or_none(data.get('EpisodeTitle')),
'episode_number': int_or_none(data.get('EpisodeNumber')), 'episode_number': traverse_obj(supplementary_data, ('entries', 0, 'item', 'episodeNumber')) or int_or_none(data.get('EpisodeNumber')),
'release_year': int_or_none(data.get('ProductionYear')), 'release_year': int_or_none(data.get('ProductionYear')),
} }
@ -372,3 +379,92 @@ def _real_extract(self, url):
'formats': formats, 'formats': formats,
'is_live': True, 'is_live': True,
} }
class DRTVSeasonIE(InfoExtractor):
IE_NAME = 'drtv:season'
_VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/saeson/(?P<display_id>[\w-]+)_(?P<id>\d+)'
_GEO_COUNTRIES = ['DK']
_TESTS = [{
'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_9008',
'info_dict': {
'id': '9008',
'display_id': 'frank-and-kastaniegaarden',
'title': 'Frank & Kastaniegaarden',
'series': 'Frank & Kastaniegaarden',
},
'playlist_mincount': 8
}, {
'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_8761',
'info_dict': {
'id': '8761',
'display_id': 'frank-and-kastaniegaarden',
'title': 'Frank & Kastaniegaarden',
'series': 'Frank & Kastaniegaarden',
},
'playlist_mincount': 19
}]
def _real_extract(self, url):
display_id, season_id = self._match_valid_url(url).group('display_id', 'id')
data = self._download_json(SERIES_API % f'/saeson/{display_id}_{season_id}', display_id)
entries = [{
'_type': 'url',
'url': f'https://www.dr.dk/drtv{episode["path"]}',
'ie_key': DRTVIE.ie_key(),
'title': episode.get('title'),
'episode': episode.get('episodeName'),
'description': episode.get('shortDescription'),
'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')),
'episode_number': episode.get('episodeNumber'),
} for episode in traverse_obj(data, ('entries', 0, 'item', 'episodes', 'items'))]
return {
'_type': 'playlist',
'id': season_id,
'display_id': display_id,
'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
'entries': entries,
'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber'))
}
class DRTVSeriesIE(InfoExtractor):
IE_NAME = 'drtv:series'
_VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/serie/(?P<display_id>[\w-]+)_(?P<id>\d+)'
_GEO_COUNTRIES = ['DK']
_TESTS = [{
'url': 'https://www.dr.dk/drtv/serie/frank-and-kastaniegaarden_6954',
'info_dict': {
'id': '6954',
'display_id': 'frank-and-kastaniegaarden',
'title': 'Frank & Kastaniegaarden',
'series': 'Frank & Kastaniegaarden',
},
'playlist_mincount': 15
}]
def _real_extract(self, url):
display_id, series_id = self._match_valid_url(url).group('display_id', 'id')
data = self._download_json(SERIES_API % f'/serie/{display_id}_{series_id}', display_id)
entries = [{
'_type': 'url',
'url': f'https://www.dr.dk/drtv{season.get("path")}',
'ie_key': DRTVSeasonIE.ie_key(),
'title': season.get('title'),
'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber'))
} for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))]
return {
'_type': 'playlist',
'id': series_id,
'display_id': display_id,
'title': traverse_obj(data, ('entries', 0, 'item', 'title')),
'series': traverse_obj(data, ('entries', 0, 'item', 'title')),
'entries': entries
}

View file

@ -1,24 +1,80 @@
import re import re
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urllib_parse_unquote from .youtube import YoutubeTabIE
from ..utils import parse_qs, smuggle_url, traverse_obj
class EmbedlyIE(InfoExtractor): class EmbedlyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?(?:src|url)=(?:[^#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
'info_dict': {
'id': 'UUGLim4T2loE5rwCMdpCIPVg',
'modified_date': '20221225',
'view_count': int,
'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
'uploader': 'TraciJHines',
'channel_url': 'https://www.youtube.com/@TraciHinesMusic',
'channel': 'TraciJHines',
'availability': 'public',
'uploader_id': 'UCGLim4T2loE5rwCMdpCIPVg',
'description': '',
'tags': [],
'title': 'Uploads from TraciJHines',
},
'playlist_mincount': 10,
}, {
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
'params': {'noplaylist': True},
'info_dict': {
'id': 'SU4fj_aEMVw',
'ext': 'mp4',
'title': 'I\'m on Patreon!',
'age_limit': 0,
'categories': ['Entertainment'],
'thumbnail': 'https://i.ytimg.com/vi_webp/SU4fj_aEMVw/maxresdefault.webp',
'live_status': 'not_live',
'playable_in_embed': True,
'channel': 'TraciJHines',
'uploader_id': 'TraciJHines',
'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg',
'uploader_url': 'http://www.youtube.com/user/TraciJHines',
'upload_date': '20150211',
'duration': 282,
'availability': 'public',
'channel_follower_count': int,
'tags': 'count:39',
'view_count': int,
'comment_count': int,
'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
'like_count': int,
'uploader': 'TraciJHines',
'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
'chapters': list,
},
}, {
'url': 'https://cdn.embedly.com/widgets/media.html?src=https://player.vimeo.com/video/1234567?h=abcdefgh',
'only_matching': True, 'only_matching': True,
}] }]
@classmethod @classmethod
def _extract_embed_urls(cls, url, webpage): def _extract_from_webpage(cls, url, webpage):
# Bypass suitable check # Bypass "ie=cls" and suitable check
for mobj in re.finditer(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage): for mobj in re.finditer(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage):
yield mobj.group('url') yield cls.url_result(mobj.group('url'))
for mobj in re.finditer(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage): for mobj in re.finditer(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage):
yield urllib.parse.unquote(mobj.group('url')) yield cls.url_result(urllib.parse.unquote(mobj.group('url')))
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) qs = parse_qs(url)
src = urllib.parse.unquote(traverse_obj(qs, ('url', 0)) or '')
if src and YoutubeTabIE.suitable(src):
return self.url_result(src, YoutubeTabIE)
return self.url_result(smuggle_url(
urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))),
{'http_headers': {'Referer': url}}))

View file

@ -1,10 +1,10 @@
import contextlib import contextlib
import os import os
from ..utils import load_plugins from ..plugins import load_plugins
# NB: Must be before other imports so that plugins can be correctly injected # NB: Must be before other imports so that plugins can be correctly injected
_PLUGIN_CLASSES = load_plugins('extractor', 'IE', {}) _PLUGIN_CLASSES = load_plugins('extractor', 'IE')
_LAZY_LOADER = False _LAZY_LOADER = False
if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
@ -24,3 +24,5 @@
globals().update(_PLUGIN_CLASSES) globals().update(_PLUGIN_CLASSES)
_ALL_CLASSES[:0] = _PLUGIN_CLASSES.values() _ALL_CLASSES[:0] = _PLUGIN_CLASSES.values()
from .common import _PLUGIN_OVERRIDES # noqa: F401

View file

@ -17,8 +17,10 @@ class FifaIE(InfoExtractor):
'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b',
'ext': 'mp4', 'ext': 'mp4',
'categories': ['FIFA Tournaments'], 'categories': ['FIFA Tournaments'],
'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA', 'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero',
'duration': 8165, 'duration': 8165,
'release_timestamp': 1152403200,
'release_date': '20060709',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -54,7 +56,7 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
preconnect_link = self._search_regex( preconnect_link = self._search_regex(
r'<link[^>]+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') r'<link\b[^>]+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link')
video_details = self._download_json( video_details = self._download_json(
f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False)
@ -62,22 +64,9 @@ def _real_extract(self, url):
preplay_parameters = self._download_json( preplay_parameters = self._download_json(
f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters'] f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters']
cid = preplay_parameters['contentId']
content_data = self._download_json( content_data = self._download_json(
f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={ 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters),
'v': preplay_parameters['preplayAPIVersion'], video_id, 'Downloading Content Data')
'tc': preplay_parameters['tokenCheckAlgorithmVersion'],
'rn': preplay_parameters['randomNumber'],
'exp': preplay_parameters['tokenExpirationDate'],
'ct': preplay_parameters['contentType'],
'cid': cid,
'mbtracks': preplay_parameters['tracksAssetNumber'],
'ad': preplay_parameters['adConfiguration'],
'ad.preroll': int(preplay_parameters['adPreroll']),
'ad.cmsid': preplay_parameters['adCMSSourceId'],
'ad.vid': preplay_parameters['adSourceVideoID'],
'sig': preplay_parameters['signature'],
})
formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)

View file

@ -210,7 +210,7 @@ def _real_extract(self, url):
page = self._download_json( page = self._download_json(
'https://www.funimation.com/api/showexperience/%s/' % experience_id, 'https://www.funimation.com/api/showexperience/%s/' % experience_id,
display_id, headers=headers, expected_status=403, query={ display_id, headers=headers, expected_status=403, query={
'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]), 'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)),
}, note=f'Downloading {format_name} JSON') }, note=f'Downloading {format_name} JSON')
sources = page.get('items') or [] sources = page.get('items') or []
if not sources: if not sources:

View file

@ -32,6 +32,7 @@
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
url_or_none, url_or_none,
urljoin,
variadic, variadic,
xpath_attr, xpath_attr,
xpath_text, xpath_text,
@ -1867,11 +1868,13 @@ class GenericIE(InfoExtractor):
'display_id': 'kelis-4th-of-july', 'display_id': 'kelis-4th-of-july',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Kelis - 4th Of July', 'title': 'Kelis - 4th Of July',
'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', 'description': 'Kelis - 4th Of July',
'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Untested major version'],
}, { }, {
# KVS Player # KVS Player
'url': 'https://www.kvs-demo.com/embed/105/', 'url': 'https://www.kvs-demo.com/embed/105/',
@ -1880,35 +1883,12 @@ class GenericIE(InfoExtractor):
'display_id': 'kelis-4th-of-july', 'display_id': 'kelis-4th-of-july',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Kelis - 4th Of July / Embed Player', 'title': 'Kelis - 4th Of July / Embed Player',
'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# KVS Player
'url': 'https://thisvid.com/videos/french-boy-pantsed/',
'md5': '3397979512c682f6b85b3b04989df224',
'info_dict': {
'id': '2400174',
'display_id': 'french-boy-pantsed',
'ext': 'mp4',
'title': 'French Boy Pantsed - ThisVid.com',
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
}
}, {
# KVS Player
'url': 'https://thisvid.com/embed/2400174/',
'md5': '3397979512c682f6b85b3b04989df224',
'info_dict': {
'id': '2400174',
'display_id': 'french-boy-pantsed',
'ext': 'mp4',
'title': 'French Boy Pantsed - ThisVid.com',
'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
}
}, {
# KVS Player
'url': 'https://youix.com/video/leningrad-zoj/', 'url': 'https://youix.com/video/leningrad-zoj/',
'md5': '94f96ba95706dc3880812b27b7d8a2b8', 'md5': '94f96ba95706dc3880812b27b7d8a2b8',
'info_dict': { 'info_dict': {
@ -1916,8 +1896,8 @@ class GenericIE(InfoExtractor):
'display_id': 'leningrad-zoj', 'display_id': 'leningrad-zoj',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com',
'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
} },
}, { }, {
# KVS Player # KVS Player
'url': 'https://youix.com/embed/18485', 'url': 'https://youix.com/embed/18485',
@ -1927,19 +1907,20 @@ class GenericIE(InfoExtractor):
'display_id': 'leningrad-zoj', 'display_id': 'leningrad-zoj',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ленинград - ЗОЖ', 'title': 'Ленинград - ЗОЖ',
'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg',
} },
}, { }, {
# KVS Player # KVS Player
'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/', 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/',
'md5': '94166bdb26b4cb1fb9214319a629fc51', 'md5': '94166bdb26b4cb1fb9214319a629fc51',
'info_dict': { 'info_dict': {
'id': '21217', 'id': '21217',
'display_id': '40-nochey-40-nights-2016', 'display_id': '40-nochey-2016',
'ext': 'mp4', 'ext': 'mp4',
'title': '40 ночей (2016) - BogMedia.org', 'title': '40 ночей (2016) - BogMedia.org',
'description': 'md5:4e6d7d622636eb7948275432eb256dc3',
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
} },
}, },
{ {
# KVS Player (for sites that serve kt_player.js via non-https urls) # KVS Player (for sites that serve kt_player.js via non-https urls)
@ -1950,8 +1931,8 @@ class GenericIE(InfoExtractor):
'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source', 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg', 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg',
} },
}, },
{ {
# Reddit-hosted video that will redirect and be processed by RedditIE # Reddit-hosted video that will redirect and be processed by RedditIE
@ -2154,7 +2135,52 @@ class GenericIE(InfoExtractor):
'age_limit': 0, 'age_limit': 0,
'direct': True, 'direct': True,
} }
} },
{
'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.',
'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
'info_dict': {
'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867',
'ext': 'mp4',
'title': 'čauky lidi 70 finall',
'description': 'čauky lidi 70 finall',
'thumbnail': 'h',
'upload_date': '20220606',
'timestamp': 1654513791,
'duration': 318.0,
'direct': True,
'age_limit': 0,
},
},
{
'note': 'JW Player embed with unicode-escape sequences in URL',
'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
'info_dict': {
'id': 'm',
'ext': 'mp4',
'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
'description': 'Mahler\'s ',
'uploader': 'www.medici.tv',
'age_limit': 0,
'thumbnail': r're:^https?://.+\.jpg',
},
'params': {
'skip_download': True,
},
},
{
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
'md5': 'e2f0a4c329f7986280b7328e24036d60',
'info_dict': {
'id': '284002',
'display_id': 'just-out-of-the-shower-joi',
'ext': 'mp4',
'title': 'Just Out Of The Shower JOI - Shooshtime',
'thumbnail': 'https://i.shoosh.co/contents/videos_screenshots/284000/284002/preview.mp4.jpg',
'height': 720,
'age_limit': 18,
},
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -2220,43 +2246,87 @@ def itunes(key):
'entries': entries, 'entries': entries,
} }
def _kvs_getrealurl(self, video_url, license_code): @classmethod
def _kvs_get_real_url(cls, video_url, license_code):
if not video_url.startswith('function/0/'): if not video_url.startswith('function/0/'):
return video_url # not obfuscated return video_url # not obfuscated
url_path, _, url_query = video_url.partition('?') parsed = urllib.parse.urlparse(video_url[len('function/0/'):])
urlparts = url_path.split('/')[2:] license = cls._kvs_get_license_token(license_code)
license = self._kvs_getlicensetoken(license_code) urlparts = parsed.path.split('/')
newmagic = urlparts[5][:32]
for o in range(len(newmagic) - 1, -1, -1): HASH_LENGTH = 32
new = '' hash = urlparts[3][:HASH_LENGTH]
l = (o + sum(int(n) for n in license[o:])) % 32 indices = list(range(HASH_LENGTH))
for i in range(0, len(newmagic)): # Swap indices of hash according to the destination calculated from the license token
if i == o: accum = 0
new += newmagic[l] for src in reversed(range(HASH_LENGTH)):
elif i == l: accum += license[src]
new += newmagic[o] dest = (src + accum) % HASH_LENGTH
else: indices[src], indices[dest] = indices[dest], indices[src]
new += newmagic[i]
newmagic = new
urlparts[5] = newmagic + urlparts[5][32:] urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:]
return '/'.join(urlparts) + '?' + url_query return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts)))
def _kvs_getlicensetoken(self, license): @staticmethod
modlicense = license.replace('$', '').replace('0', '1') def _kvs_get_license_token(license):
center = int(len(modlicense) / 2) license = license.replace('$', '')
license_values = [int(char) for char in license]
modlicense = license.replace('0', '1')
center = len(modlicense) // 2
fronthalf = int(modlicense[:center + 1]) fronthalf = int(modlicense[:center + 1])
backhalf = int(modlicense[center:]) backhalf = int(modlicense[center:])
modlicense = str(4 * abs(fronthalf - backhalf))[:center + 1]
modlicense = str(4 * abs(fronthalf - backhalf)) return [
retval = '' (license_values[index + offset] + current) % 10
for o in range(0, center + 1): for index, current in enumerate(map(int, modlicense))
for i in range(1, 5): for offset in range(4)
retval += str((int(license[o + i]) + int(modlicense[o])) % 10) ]
return retval
def _extract_kvs(self, url, webpage, video_id):
flashvars = self._search_json(
r'(?s:<script\b[^>]*>.*?var\s+flashvars\s*=)',
webpage, 'flashvars', video_id, transform_source=js_to_json)
# extract the part after the last / as the display_id from the
# canonical URL.
display_id = self._search_regex(
r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
webpage, 'display_id', fatal=False)
title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
thumbnail = flashvars['preview_url']
if thumbnail.startswith('//'):
protocol, _, _ = url.partition('/')
thumbnail = protocol + thumbnail
url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys()))
formats = []
for key in url_keys:
if '/get_file/' not in flashvars[key]:
continue
format_id = flashvars.get(f'{key}_text', key)
formats.append({
'url': urljoin(url, self._kvs_get_real_url(flashvars[key], flashvars['license_code'])),
'format_id': format_id,
'ext': 'mp4',
**(parse_resolution(format_id) or parse_resolution(flashvars[key])),
'http_headers': {'Referer': url},
})
if not formats[-1].get('height'):
formats[-1]['quality'] = 1
return {
'id': flashvars['video_id'],
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}
def _real_extract(self, url): def _real_extract(self, url):
if url.startswith('//'): if url.startswith('//'):
@ -2312,7 +2382,7 @@ def _real_extract(self, url):
# It may probably better to solve this by checking Content-Type for application/octet-stream # It may probably better to solve this by checking Content-Type for application/octet-stream
# after a HEAD request, but not sure if we can rely on this. # after a HEAD request, but not sure if we can rely on this.
full_response = self._request_webpage(url, video_id, headers={ full_response = self._request_webpage(url, video_id, headers={
'Accept-Encoding': '*', 'Accept-Encoding': 'identity',
**smuggled_data.get('http_headers', {}) **smuggled_data.get('http_headers', {})
}) })
new_url = full_response.geturl() new_url = full_response.geturl()
@ -2565,6 +2635,17 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
self.report_detected('video.js embed') self.report_detected('video.js embed')
return [{'formats': formats, 'subtitles': subtitles}] return [{'formats': formats, 'subtitles': subtitles}]
# Look for generic KVS player (before json-ld bc of some urls that break otherwise)
found = self._search_regex((
r'<script\b[^>]+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P<ver>\d+(?:\.\d+)+)\1[^>]*>',
r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P<ver>\d+(?:\.\d+)+)\2\s*,',
), webpage, 'KVS player', group='ver', default=False)
if found:
self.report_detected('KWS Player')
if found.split('.')[0] not in ('4', '5', '6'):
self.report_warning(f'Untested major version ({found}) in player engine - download may fail.')
return [self._extract_kvs(url, webpage, video_id)]
# Looking for http://schema.org/VideoObject # Looking for http://schema.org/VideoObject
json_ld = self._search_json_ld(webpage, video_id, default={}) json_ld = self._search_json_ld(webpage, video_id, default={})
if json_ld.get('url') not in (url, None): if json_ld.get('url') not in (url, None):
@ -2607,52 +2688,6 @@ def filter_video(urls):
['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
if found: if found:
self.report_detected('JW Player embed') self.report_detected('JW Player embed')
if not found:
# Look for generic KVS player
found = re.search(r'<script [^>]*?src="https?://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
if found:
self.report_detected('KWS Player')
if found.group('maj_ver') not in ['4', '5']:
self.report_warning('Untested major version (%s) in player engine--Download may fail.' % found.group('ver'))
flashvars = re.search(r'(?ms)<script.*?>.*?var\s+flashvars\s*=\s*(\{.*?\});.*?</script>', webpage)
flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json)
# extract the part after the last / as the display_id from the
# canonical URL.
display_id = self._search_regex(
r'(?:<link href="https?://[^"]+/(.+?)/?" rel="canonical"\s*/?>'
r'|<link rel="canonical" href="https?://[^"]+/(.+?)/?"\s*/?>)',
webpage, 'display_id', fatal=False
)
title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)</(?:h1|title)>', webpage, 'title')
thumbnail = flashvars['preview_url']
if thumbnail.startswith('//'):
protocol, _, _ = url.partition('/')
thumbnail = protocol + thumbnail
url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys()))
formats = []
for key in url_keys:
if '/get_file/' not in flashvars[key]:
continue
format_id = flashvars.get(f'{key}_text', key)
formats.append({
'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']),
'format_id': format_id,
'ext': 'mp4',
**(parse_resolution(format_id) or parse_resolution(flashvars[key]))
})
if not formats[-1].get('height'):
formats[-1]['quality'] = 1
return [{
'id': flashvars['video_id'],
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}]
if not found: if not found:
# Broaden the search a little bit # Broaden the search a little bit
found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
@ -2733,6 +2768,7 @@ def filter_video(urls):
entries = [] entries = []
for video_url in orderedSet(found): for video_url in orderedSet(found):
video_url = video_url.encode().decode('unicode-escape')
video_url = unescapeHTML(video_url) video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/') video_url = video_url.replace('\\/', '/')
video_url = urllib.parse.urljoin(url, video_url) video_url = urllib.parse.urljoin(url, video_url)

View file

@ -527,11 +527,14 @@ def _extract_vms_player_js(self, webpage, video_id):
webpack_js_url = self._proto_relative_url(self._search_regex( webpack_js_url = self._proto_relative_url(self._search_regex(
r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL')) r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS') webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
webpack_map1, webpack_map2 = [self._parse_json(js_map, video_id, transform_source=js_to_json) for js_map in self._search_regex( webpack_map = self._search_json(
r'\(({[^}]*})\[\w+\][^\)]*\)\s*\+\s*["\']\.["\']\s*\+\s*({[^}]*})\[\w+\]\+["\']\.js', webpack_js, 'JS locations', group=(1, 2))] r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
for module_index in reversed(list(webpack_map2.keys())): contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
for module_index in reversed(webpack_map):
module_js = self._download_webpage( module_js = self._download_webpage(
f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js', f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js',
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or '' video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
if 'vms request' in module_js: if 'vms request' in module_js:
self.cache.store('iq', 'player_js', module_js) self.cache.store('iq', 'player_js', module_js)
@ -543,11 +546,11 @@ def _extract_cmd5x_function(self, webpage, video_id):
self._extract_vms_player_js(webpage, video_id), 'signature function') self._extract_vms_player_js(webpage, video_id), 'signature function')
def _update_bid_tags(self, webpage, video_id): def _update_bid_tags(self, webpage, video_id):
extracted_bid_tags = self._parse_json( extracted_bid_tags = self._search_json(
self._search_regex( r'function\s*\([^)]*\)\s*\{\s*"use strict";?\s*var \w\s*=\s*',
r'arguments\[1\][^,]*,\s*function\s*\([^\)]*\)\s*{\s*"use strict";?\s*var \w=({.+}})\s*,\s*\w\s*=\s*{\s*getNewVd', self._extract_vms_player_js(webpage, video_id), 'video tags', video_id,
self._extract_vms_player_js(webpage, video_id), 'video tags', default=''), contains_pattern=r'{\s*\d+\s*:\s*\{\s*nbid\s*:.+}\s*}',
video_id, transform_source=js_to_json, fatal=False) end_pattern=r'\s*,\s*\w\s*=\s*\{\s*getNewVd', fatal=False, transform_source=js_to_json)
if not extracted_bid_tags: if not extracted_bid_tags:
return return
self._BID_TAGS = { self._BID_TAGS = {

View file

@ -23,9 +23,19 @@ class JojIE(InfoExtractor):
'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
'ext': 'mp4', 'ext': 'mp4',
'title': 'NOVÉ BÝVANIE', 'title': 'NOVÉ BÝVANIE',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*?$',
'duration': 3118, 'duration': 3118,
} }
}, {
'url': 'https://media.joj.sk/embed/CSM0Na0l0p1',
'info_dict': {
'id': 'CSM0Na0l0p1',
'ext': 'mp4',
'height': 576,
'title': 'Extrémne rodiny 2 - POKRAČOVANIE (2012/04/09 21:30:00)',
'duration': 3937,
'thumbnail': r're:^https?://.*?$',
}
}, { }, {
'url': 'https://media.joj.sk/embed/9i1cxv', 'url': 'https://media.joj.sk/embed/9i1cxv',
'only_matching': True, 'only_matching': True,
@ -43,10 +53,10 @@ def _real_extract(self, url):
webpage = self._download_webpage( webpage = self._download_webpage(
'https://media.joj.sk/embed/%s' % video_id, video_id) 'https://media.joj.sk/embed/%s' % video_id, video_id)
title = self._search_regex( title = (self._search_json(r'videoTitle\s*:', webpage, 'title', video_id,
(r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1', contains_pattern=r'["\'].+["\']', default=None)
r'<title>(?P<title>[^<]+)'), webpage, 'title', or self._html_extract_title(webpage, default=None)
default=None, group='title') or self._og_search_title(webpage) or self._og_search_title(webpage))
bitrates = self._parse_json( bitrates = self._parse_json(
self._search_regex( self._search_regex(
@ -58,11 +68,13 @@ def _real_extract(self, url):
for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []: for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
if isinstance(format_url, compat_str): if isinstance(format_url, compat_str):
height = self._search_regex( height = self._search_regex(
r'(\d+)[pP]\.', format_url, 'height', default=None) r'(\d+)[pP]|(pal)\.', format_url, 'height', default=None)
if height == 'pal':
height = 576
formats.append({ formats.append({
'url': format_url, 'url': format_url,
'format_id': format_field(height, None, '%sp'), 'format_id': format_field(height, None, '%sp'),
'height': int(height), 'height': int_or_none(height),
}) })
if not formats: if not formats:
playlist = self._download_xml( playlist = self._download_xml(

View file

@ -0,0 +1,48 @@
import time
import random
import string
import hashlib
import urllib.parse
from .common import InfoExtractor
class KankaNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P<id>\d+)\.shtml'
_TESTS = [{
'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227',
'md5': '05e126513c74b1258d657452a6f4eef9',
'info_dict': {
'id': '4485057',
'url': 'http://mediaplay.kksmg.com/2022/11/08/h264_450k_mp4_1a388ad771e0e4cc28b0da44d245054e_ncm.mp4',
'ext': 'mp4',
'title': '视频第23个中国记者节我们在进博切蛋糕',
'thumbnail': r're:^https?://.*\.jpg*',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(r'omsid\s*=\s*"(\d+)"', webpage, 'video id')
params = {
'nonce': ''.join(random.choices(string.ascii_lowercase + string.digits, k=8)),
'omsid': video_id,
'platform': 'pc',
'timestamp': int(time.time()),
'version': '1.0',
}
params['sign'] = hashlib.md5((hashlib.md5((
urllib.parse.urlencode(params) + '&28c8edde3d61a0411511d3b1866f0636'
).encode()).hexdigest()).encode()).hexdigest()
meta = self._download_json('https://api-app.kankanews.com/kankan/pc/getvideo',
video_id, query=params)['result']['video']
return {
'id': video_id,
'url': meta['videourl'],
'title': self._search_regex(r'g\.title\s*=\s*"([^"]+)"', webpage, 'title'),
'thumbnail': meta.get('titlepic'),
}

127
yt_dlp/extractor/kick.py Normal file
View file

@ -0,0 +1,127 @@
from .common import InfoExtractor
from ..utils import (
HEADRequest,
UserNotLive,
float_or_none,
merge_dicts,
str_or_none,
traverse_obj,
unified_timestamp,
url_or_none,
)
class KickBaseIE(InfoExtractor):
def _real_initialize(self):
self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session')
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
if not xsrf_token:
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
KickBaseIE._API_HEADERS = {
'Authorization': f'Bearer {xsrf_token.value}',
'X-XSRF-TOKEN': xsrf_token.value,
} if xsrf_token else {}
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
return self._download_json(
f'https://kick.com/api/v1/{path}', display_id, note=note,
headers=merge_dicts(headers, self._API_HEADERS), **kwargs)
class KickIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w_]+)'
_TESTS = [{
'url': 'https://kick.com/yuppy',
'info_dict': {
'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
'ext': 'mp4',
'title': str,
'description': str,
'channel': 'yuppy',
'channel_id': '33538',
'uploader': 'Yuppy',
'uploader_id': '33793',
'upload_date': str,
'live_status': 'is_live',
'timestamp': int,
'thumbnail': r're:^https?://.*\.jpg',
'categories': list,
},
'skip': 'livestream',
}, {
'url': 'https://kick.com/kmack710',
'only_matching': True,
}]
def _real_extract(self, url):
channel = self._match_id(url)
response = self._call_api(f'channels/{channel}', channel)
if not traverse_obj(response, 'livestream', expected_type=dict):
raise UserNotLive(video_id=channel)
return {
'id': str(traverse_obj(
response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
'formats': self._extract_m3u8_formats(
response['playback_url'], channel, 'mp4', live=True),
'title': traverse_obj(
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
'description': traverse_obj(response, ('user', 'bio')),
'channel': channel,
'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
'uploader': traverse_obj(response, 'name', ('user', 'username')),
'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
'is_live': True,
'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
'thumbnail': traverse_obj(
response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
}
class KickVODIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35',
'md5': '73691206a6a49db25c5aa1588e6538fc',
'info_dict': {
'id': '54244b5e-050a-4df4-a013-b2433dafbe35',
'ext': 'mp4',
'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links',
'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f',
'channel': 'kmack710',
'channel_id': '16278',
'uploader': 'Kmack710',
'uploader_id': '16412',
'upload_date': '20221206',
'timestamp': 1670318289,
'duration': 40104.0,
'thumbnail': r're:^https?://.*\.jpg',
'categories': ['Grand Theft Auto V'],
},
'params': {
'skip_download': 'm3u8',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
response = self._call_api(f'video/{video_id}', video_id)
return {
'id': video_id,
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
'title': traverse_obj(
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
'timestamp': unified_timestamp(response.get('created_at')),
'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
'thumbnail': traverse_obj(
response, ('livestream', 'thumbnail'), expected_type=url_or_none),
'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
}

View file

@ -75,9 +75,8 @@ class LinuxAcademyIE(InfoExtractor):
def _perform_login(self, username, password): def _perform_login(self, username, password):
def random_string(): def random_string():
return ''.join([ return ''.join(random.choices(
random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') '0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~', k=32))
for _ in range(32)])
webpage, urlh = self._download_webpage_handle( webpage, urlh = self._download_webpage_handle(
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ self._AUTHORIZE_URL, None, 'Downloading authorize page', query={

View file

@ -1,8 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import UserNotLive, traverse_obj
ExtractorError,
traverse_obj,
)
class MixchIE(InfoExtractor): class MixchIE(InfoExtractor):
@ -33,7 +30,7 @@ def _real_extract(self, url):
initial_js_state = self._parse_json(self._search_regex( initial_js_state = self._parse_json(self._search_regex(
r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id) r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id)
if not initial_js_state.get('liveInfo'): if not initial_js_state.get('liveInfo'):
raise ExtractorError('Livestream has ended.', expected=True) raise UserNotLive(video_id=video_id)
return { return {
'id': video_id, 'id': video_id,
@ -45,7 +42,8 @@ def _real_extract(self, url):
'uploader_id': video_id, 'uploader_id': video_id,
'formats': [{ 'formats': [{
'format_id': 'hls', 'format_id': 'hls',
'url': traverse_obj(initial_js_state, ('liveInfo', 'hls')) or 'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_%s.m3u8' % video_id, 'url': (traverse_obj(initial_js_state, ('liveInfo', 'hls'))
or f'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_{video_id}.m3u8'),
'ext': 'mp4', 'ext': 'mp4',
'protocol': 'm3u8', 'protocol': 'm3u8',
}], }],

View file

@ -8,24 +8,26 @@
from ..compat import compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest,
RegexNotFoundError,
UserNotLive,
clean_html,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
RegexNotFoundError,
smuggle_url, smuggle_url,
str_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unified_strdate, unescapeHTML,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
url_basename, url_basename,
variadic, xpath_attr,
) )
class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))' _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
_TESTS = [ _TESTS = [
{ {
@ -38,10 +40,18 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'timestamp': 1424246400, 'timestamp': 1424246400,
'upload_date': '20150218', 'upload_date': '20150218',
'uploader': 'NBCU-COM', 'uploader': 'NBCU-COM',
'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
'episode_number': 86,
'season': 'Season 2',
'season_number': 2,
'series': 'Tonight Show: Jimmy Fallon',
'duration': 237.0,
'chapters': 'count:1',
'tags': 'count:4',
'thumbnail': r're:https?://.+\.jpg',
}, },
'params': { 'params': {
# m3u8 download 'skip_download': 'm3u8',
'skip_download': True,
}, },
}, },
{ {
@ -55,11 +65,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'upload_date': '20141206', 'upload_date': '20141206',
'uploader': 'NBCU-COM', 'uploader': 'NBCU-COM',
}, },
'params': { 'skip': 'page not found',
# m3u8 download
'skip_download': True,
},
'skip': 'Only works from US',
}, },
{ {
# HLS streams requires the 'hdnea3' cookie # HLS streams requires the 'hdnea3' cookie
@ -73,10 +79,59 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
'upload_date': '20090315', 'upload_date': '20090315',
'uploader': 'NBCU-COM', 'uploader': 'NBCU-COM',
}, },
'params': { 'skip': 'page not found',
'skip_download': True, },
{
# manifest url does not have extension
'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
'info_dict': {
'id': '3646439',
'ext': 'mp4',
'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
'episode_number': 1,
'season': 'Season 75',
'season_number': 75,
'series': 'The Golden Globe Awards',
'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
'uploader': 'NBCU-COM',
'upload_date': '20180107',
'timestamp': 1515312000,
'duration': 570.0,
'tags': 'count:8',
'thumbnail': r're:https?://.+\.jpg',
'chapters': 'count:1',
},
'params': {
'skip_download': 'm3u8',
},
},
{
# new video_id format
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
'info_dict': {
'id': 'NBCE125189978',
'ext': 'mp4',
'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
'uploader': 'NBCU-COM',
'series': 'Quantum Leap',
'season': 'Season 1',
'season_number': 1,
'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
'episode_number': 1,
'duration': 170.171,
'chapters': [],
'timestamp': 1663956155,
'upload_date': '20220923',
'tags': 'count:10',
'age_limit': 0,
'thumbnail': r're:https?://.+\.jpg',
},
'expected_warnings': ['Ignoring subtitle tracks'],
'params': {
'skip_download': 'm3u8',
}, },
'skip': 'Only works from US',
}, },
{ {
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
@ -136,6 +191,7 @@ def _real_extract(self, url):
query = { query = {
'mbr': 'true', 'mbr': 'true',
'manifest': 'm3u', 'manifest': 'm3u',
'switch': 'HLSServiceSecure',
} }
video_id = video_data['mpxGuid'] video_id = video_data['mpxGuid']
tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id) tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id)
@ -599,21 +655,22 @@ class NBCStationsIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/', 'url': 'https://www.nbclosangeles.com/news/local/large-structure-fire-in-downtown-la-prompts-smoke-odor-advisory/2968618/',
'md5': '462041d91bd762ef5a38b7d85d6dc18f',
'info_dict': { 'info_dict': {
'id': '2968618', 'id': '2968618',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory', 'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
'description': None, 'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
'timestamp': 1661135892, 'timestamp': 1661135892,
'upload_date': '20220821', 'upload_date': '20220822',
'uploader': 'NBC 4', 'uploader': 'NBC 4',
'uploader_id': 'KNBC', 'channel_id': 'KNBC',
'channel': 'nbclosangeles', 'channel': 'nbclosangeles',
}, },
'params': {
'skip_download': 'm3u8',
},
}, { }, {
'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/', 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
'md5': '0917dcf7885be1023a9220630d415f67',
'info_dict': { 'info_dict': {
'id': '2247002', 'id': '2247002',
'ext': 'mp4', 'ext': 'mp4',
@ -622,9 +679,12 @@ class NBCStationsIE(InfoExtractor):
'timestamp': 1660886507, 'timestamp': 1660886507,
'upload_date': '20220819', 'upload_date': '20220819',
'uploader': 'Telemundo Arizona', 'uploader': 'Telemundo Arizona',
'uploader_id': 'KTAZ', 'channel_id': 'KTAZ',
'channel': 'telemundoarizona', 'channel': 'telemundoarizona',
}, },
'params': {
'skip_download': 'm3u8',
},
}] }]
_RESOLUTIONS = { _RESOLUTIONS = {
@ -643,48 +703,39 @@ def _real_extract(self, url):
r'<script>var\s*nbc\s*=', webpage, 'NBC JSON data', video_id) r'<script>var\s*nbc\s*=', webpage, 'NBC JSON data', video_id)
pdk_acct = nbc_data.get('pdkAcct') or 'Yh1nAC' pdk_acct = nbc_data.get('pdkAcct') or 'Yh1nAC'
fw_ssid = traverse_obj(nbc_data, ('video', 'fwSSID')) fw_ssid = traverse_obj(nbc_data, ('video', 'fwSSID'))
fw_network_id = traverse_obj(nbc_data, ('video', 'fwNetworkID'), default='382114')
video_data = self._parse_json(self._html_search_regex( video_data = self._search_json(
r'data-videos="([^"]*)"', webpage, 'video data', default='{}'), video_id) r'data-videos="\[', webpage, 'video data', video_id, default={}, transform_source=unescapeHTML)
video_data = variadic(video_data)[0] video_data.update(self._search_json(
video_data.update(self._parse_json(self._html_search_regex( r'data-meta="', webpage, 'metadata', video_id, default={}, transform_source=unescapeHTML))
r'data-meta="([^"]*)"', webpage, 'metadata', default='{}'), video_id)) if not video_data:
raise ExtractorError('No video metadata found in webpage', expected=True)
formats = [] info, formats, subtitles = {}, [], {}
is_live = int_or_none(video_data.get('mpx_is_livestream')) == 1
if video_data.get('mpx_is_livestream') == '1':
live = True
player_id = traverse_obj(
video_data, 'mpx_m3upid', ('video', 'meta', 'mpx_m3upid'), 'mpx_pid',
('video', 'meta', 'mpx_pid'), 'pid_streaming_web_medium')
query = { query = {
'mbr': 'true', 'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
'assetTypes': 'LegacyRelease', 'format': 'SMIL',
'fwsitesection': fw_ssid, 'fwsitesection': fw_ssid,
'fwNetworkID': fw_network_id, 'fwNetworkID': traverse_obj(nbc_data, ('video', 'fwNetworkID'), default='382114'),
'pprofile': 'ots_desktop_html', 'pprofile': 'ots_desktop_html',
'sensitive': 'false', 'sensitive': 'false',
'w': '1920', 'w': '1920',
'h': '1080', 'h': '1080',
'rnd': '1660303', 'mode': 'LIVE' if is_live else 'on-demand',
'mode': 'LIVE',
'format': 'SMIL',
'tracking': 'true',
'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
'vpaid': 'script', 'vpaid': 'script',
'schema': '2.0', 'schema': '2.0',
'SDK': 'PDK+6.1.3', 'sdk': 'PDK 6.1.3',
}
info = {
'title': f'{channel} livestream',
} }
if is_live:
player_id = traverse_obj(video_data, ((None, ('video', 'meta')), (
'mpx_m3upid', 'mpx_pid', 'pid_streaming_web_medium')), get_all=False)
info['title'] = f'{channel} livestream'
else: else:
live = False player_id = traverse_obj(video_data, (
player_id = traverse_obj( (None, ('video', 'meta')), ('pid_streaming_web_high', 'mpx_pid')), get_all=False)
video_data, ('video', 'meta', 'pid_streaming_web_high'), 'pid_streaming_web_high',
('video', 'meta', 'mpx_pid'), 'mpx_pid')
date_string = traverse_obj(video_data, 'date_string', 'date_gmt') date_string = traverse_obj(video_data, 'date_string', 'date_gmt')
if date_string: if date_string:
@ -692,63 +743,58 @@ def _real_extract(self, url):
r'datetime="([^"]+)"', date_string, 'date string', fatal=False) r'datetime="([^"]+)"', date_string, 'date string', fatal=False)
else: else:
date_string = traverse_obj( date_string = traverse_obj(
nbc_data, ('dataLayer', 'adobe', 'prop70'), ('dataLayer', 'adobe', 'eVar70'), nbc_data, ('dataLayer', 'adobe', ('prop70', 'eVar70', 'eVar59')), get_all=False)
('dataLayer', 'adobe', 'eVar59'))
video_url = traverse_obj(video_data, ('video', 'meta', 'mp4_url'), 'mp4_url') video_url = traverse_obj(video_data, ((None, ('video', 'meta')), 'mp4_url'), get_all=False)
if video_url: if video_url:
height = url_basename(video_url).split('-')[1].split('p')[0] height = self._search_regex(r'\d+-(\d+)p', url_basename(video_url), 'height', default=None)
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'ext': 'mp4', 'ext': 'mp4',
'width': int_or_none(self._RESOLUTIONS.get(height)), 'width': int_or_none(self._RESOLUTIONS.get(height)),
'height': int_or_none(height), 'height': int_or_none(height),
'format_id': f'http-{height}', 'format_id': 'http-mp4',
}) })
query = { info.update({
'mbr': 'true', 'title': video_data.get('title') or traverse_obj(nbc_data, (
'assetTypes': 'LegacyRelease', 'dataLayer', (None, 'adobe'), ('contenttitle', 'title', 'prop22')), get_all=False),
'fwsitesection': fw_ssid, 'description':
'fwNetworkID': fw_network_id, traverse_obj(video_data, 'summary', 'excerpt', 'video_hero_text')
'format': 'redirect', or clean_html(traverse_obj(nbc_data, ('dataLayer', 'summary'))),
'manifest': 'm3u', 'timestamp': unified_timestamp(date_string),
'Tracking': 'true', })
'Embedded': 'true',
'formats': 'MPEG4',
}
info = {
'title': video_data.get('title') or traverse_obj(
nbc_data, ('dataLayer', 'contenttitle'), ('dataLayer', 'title'),
('dataLayer', 'adobe', 'prop22'), ('dataLayer', 'id')),
'description': traverse_obj(video_data, 'summary', 'excerpt', 'video_hero_text'),
'upload_date': str_or_none(unified_strdate(date_string)),
'timestamp': int_or_none(unified_timestamp(date_string)),
}
if not player_id: smil = None
raise ExtractorError( if player_id and fw_ssid:
'No video player ID or livestream player ID found in webpage', expected=True) smil = self._download_xml(
headers = {'Origin': f'https://www.{channel}.com'}
manifest, urlh = self._download_webpage_handle(
f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id, f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id,
headers=headers, query=query, note='Downloading manifest') note='Downloading SMIL data', query=query, fatal=is_live)
if live: if smil:
manifest_url = self._search_regex(r'<video src="([^"]*)', manifest, 'manifest URL') manifest_url = xpath_attr(smil, './/{*}video', 'src', fatal=is_live)
else: subtitles = self._parse_smil_subtitles(smil, '*')
manifest_url = urlh.geturl() fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, video_id, 'mp4', m3u8_id='hls', fatal=is_live,
live=is_live, errnote='No HLS formats found')
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
formats.extend(self._extract_m3u8_formats( if not formats:
manifest_url, video_id, 'mp4', headers=headers, m3u8_id='hls', self.raise_no_formats('No video content found in webpage', expected=True)
fatal=live, live=live, errnote='No HLS formats found')) elif is_live:
try:
self._request_webpage(
HEADRequest(formats[0]['url']), video_id, note='Checking live status')
except ExtractorError:
raise UserNotLive(video_id=channel)
return { return {
'id': str_or_none(video_id), 'id': video_id,
'channel': channel, 'channel': channel,
'uploader': str_or_none(nbc_data.get('on_air_name')), 'channel_id': nbc_data.get('callLetters'),
'uploader_id': str_or_none(nbc_data.get('callLetters')), 'uploader': nbc_data.get('on_air_name'),
'formats': formats, 'formats': formats,
'is_live': live, 'subtitles': subtitles,
'is_live': is_live,
**info, **info,
} }

View file

@ -1,6 +1,6 @@
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from ..utils import smuggle_url, traverse_obj from ..utils import smuggle_url, traverse_obj
@ -251,3 +251,31 @@ def _real_extract(self, url):
self.parse_playlist(playlist_data['response'], playlist_id), self.parse_playlist(playlist_data['response'], playlist_id),
traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')), traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
traverse_obj(playlist_data, ('response', 'webseries_info', 'title'))) traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
class NetverseSearchIE(SearchInfoExtractor):
_SEARCH_KEY = 'netsearch'
_TESTS = [{
'url': 'netsearch10:tetangga',
'info_dict': {
'id': 'tetangga',
'title': 'tetangga',
},
'playlist_count': 10,
}]
def _search_results(self, query):
last_page = None
for i in itertools.count(1):
search_data = self._download_json(
'https://api.netverse.id/search/elastic/search', query,
query={'q': query, 'page': i}, note=f'Downloading page {i}')
videos = traverse_obj(search_data, ('response', 'data', ...))
for video in videos:
yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE)
last_page = last_page or traverse_obj(search_data, ('response', 'lastpage'))
if not videos or i >= (last_page or 0):
break

View file

@ -11,6 +11,7 @@
int_or_none, int_or_none,
qualities, qualities,
smuggle_url, smuggle_url,
traverse_obj,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unsmuggle_url, unsmuggle_url,
@ -153,6 +154,26 @@ class OdnoklassnikiIE(InfoExtractor):
'title': 'Быковское крещение', 'title': 'Быковское крещение',
'duration': 3038.181, 'duration': 3038.181,
}, },
'skip': 'HTTP Error 400',
}, {
'note': 'subtitles',
'url': 'https://ok.ru/video/4249587550747',
'info_dict': {
'id': '4249587550747',
'ext': 'mp4',
'title': 'Small Country An African Childhood (2020) (1080p) +subtitle',
'uploader': 'Sunflower Movies',
'uploader_id': '595802161179',
'upload_date': '20220816',
'duration': 6728,
'age_limit': 0,
'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
'like_count': int,
'subtitles': dict,
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
'only_matching': True, 'only_matching': True,
@ -202,6 +223,7 @@ class OdnoklassnikiIE(InfoExtractor):
'like_count': 0, 'like_count': 0,
'duration': 10444, 'duration': 10444,
}, },
'skip': 'Site no longer embeds',
}] }]
@classmethod @classmethod
@ -294,6 +316,16 @@ def _extract_desktop(self, url):
like_count = int_or_none(metadata.get('likeCount')) like_count = int_or_none(metadata.get('likeCount'))
subtitles = {}
for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict):
sub_url = sub.get('url')
if not sub_url:
continue
subtitles.setdefault(sub.get('language') or 'en', []).append({
'url': sub_url,
'ext': 'vtt',
})
info = { info = {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -305,6 +337,7 @@ def _extract_desktop(self, url):
'like_count': like_count, 'like_count': like_count,
'age_limit': age_limit, 'age_limit': age_limit,
'start_time': start_time, 'start_time': start_time,
'subtitles': subtitles,
} }
# pladform # pladform

View file

@ -1,71 +1,128 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
ExtractorError,
get_element_by_class,
int_or_none,
merge_dicts,
url_or_none,
)
class PeekVidsIE(InfoExtractor): class PeekVidsBaseIE(InfoExtractor):
def _real_extract(self, url):
domain, video_id = self._match_valid_url(url).group('domain', 'id')
webpage = self._download_webpage(url, video_id, expected_status=429)
if '>Rate Limit Exceeded' in webpage:
raise ExtractorError(
f'You are suspected as a bot. Wait, or pass the captcha on the site and provide cookies. {self._login_hint()}',
video_id=video_id, expected=True)
title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
display_id = video_id
video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
srcs = self._download_json(
f'https://www.{domain}/v-alt/{video_id}', video_id,
note='Downloading list of source files')
formats = []
for k, v in srcs.items():
f_url = url_or_none(v)
if not f_url:
continue
height = self._search_regex(r'^data-src(\d{3,})$', k, 'height', default=None)
if not height:
continue
formats.append({
'url': f_url,
'format_id': height,
'height': int_or_none(height),
})
if not formats:
formats = [{'url': url} for url in srcs.values()]
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
info.pop('url', None)
# may not have found the thumbnail if it was in a list in the ld+json
info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
detail = (get_element_by_class('detail-video-block', webpage)
or get_element_by_class('detail-block', webpage) or '')
info['description'] = self._html_search_regex(
rf'(?s)(.+?)(?:{re.escape(info.get("description", ""))}\s*<|<ul\b)',
detail, 'description', default=None) or None
info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
def cat_tags(name, html):
l = self._html_search_regex(
rf'(?s)<span\b[^>]*>\s*{re.escape(name)}\s*:\s*</span>(.+?)</li>',
html, name, default='')
return list(filter(None, re.split(r'\s+', l)))
return merge_dicts({
'id': video_id,
'display_id': display_id,
'age_limit': 18,
'formats': formats,
'categories': cat_tags('Categories', detail),
'tags': cat_tags('Tags', detail),
'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
}, info)
class PeekVidsIE(PeekVidsBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:www\.)?peekvids\.com/ https?://(?:www\.)?(?P<domain>peekvids\.com)/
(?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
(?P<id>[^/?&#]*) (?P<id>[^/?&#]*)
''' '''
_TESTS = [{ _TESTS = [{
'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
'md5': 'a00940646c428e232407e3e62f0e8ef5', 'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
'info_dict': { 'info_dict': {
'id': 'BSyLMbN0YCd', 'id': '1262717',
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', 'display_id': 'BSyLMbN0YCd',
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp (7 min), uploaded by SEXYhub.com', 'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
'timestamp': 1642579329, 'timestamp': 1642579329,
'upload_date': '20220119', 'upload_date': '20220119',
'duration': 416, 'duration': 416,
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
'uploader': 'SEXYhub.com',
'categories': list,
'tags': list,
}, },
}] }]
_DOMAIN = 'www.peekvids.com'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
short_video_id = self._html_search_regex(r'<video [^>]*data-id="(.+?)"', webpage, 'short video ID')
srcs = self._download_json(
f'https://{self._DOMAIN}/v-alt/{short_video_id}', video_id,
note='Downloading list of source files')
formats = [{
'url': url,
'ext': 'mp4',
'format_id': name[8:],
} for name, url in srcs.items() if len(name) > 8 and name.startswith('data-src')]
if not formats:
formats = [{'url': url} for url in srcs.values()]
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject')
info.update({
'id': video_id,
'age_limit': 18,
'formats': formats,
})
return info
class PlayVidsIE(PeekVidsIE): # XXX: Do not subclass from concrete IE class PlayVidsIE(PeekVidsBaseIE):
_VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|[^/]{2}/)?(?P<id>[^/?#]*)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>playvids\.com)/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', 'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
'md5': 'cd7dfd8a2e815a45402369c76e3c1825', 'md5': '2f12e50213dd65f142175da633c4564c',
'info_dict': { 'info_dict': {
'id': 'U3pBrYhsjXM', 'id': '1978030',
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', 'display_id': 'U3pBrYhsjXM',
'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp video in HD, uploaded by SEXYhub.com', 'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
'timestamp': 1640435839, 'timestamp': 1640435839,
'upload_date': '20211225', 'upload_date': '20211225',
'duration': 416, 'duration': 416,
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
'uploader': 'SEXYhub.com',
'categories': list,
'tags': list,
}, },
}, { }, {
'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', 'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
@ -73,5 +130,62 @@ class PlayVidsIE(PeekVidsIE): # XXX: Do not subclass from concrete IE
}, { }, {
'url': 'https://www.playvids.com/embed/U3pBrYhsjXM', 'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
'md5': 'e783986e596cafbf46411a174ab42ba6',
'info_dict': {
'id': '762385',
'display_id': 'bKmGLe3IwjZ',
'ext': 'mp4',
'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
'timestamp': 1516958544,
'upload_date': '20180126',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 480,
'uploader': 'Brazzers',
'age_limit': 18,
'view_count': int,
'age_limit': 18,
'categories': list,
'tags': list,
},
}, {
'url': 'https://www.playvids.com/v/47iUho33toY',
'md5': 'b056b5049d34b648c1e86497cf4febce',
'info_dict': {
'id': '700621',
'display_id': '47iUho33toY',
'ext': 'mp4',
'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
'description': None,
'timestamp': 1507052209,
'upload_date': '20171003',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 332,
'uploader': 'Cacerenele',
'age_limit': 18,
'view_count': int,
'categories': list,
'tags': list,
},
}, {
'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
'info_dict': {
'id': '1523518',
'display_id': 'z3_7iwWCmqt',
'ext': 'mp4',
'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
'description': None,
'timestamp': 1607470323,
'upload_date': '20201208',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 593,
'uploader': 'yorours',
'age_limit': 18,
'view_count': int,
'categories': list,
'tags': list,
},
}] }]
_DOMAIN = 'www.playvids.com'

View file

@ -10,6 +10,7 @@
compat_urlparse compat_urlparse
) )
from ..utils import ( from ..utils import (
determine_ext,
extract_attributes, extract_attributes,
ExtractorError, ExtractorError,
InAdvancePagedList, InAdvancePagedList,
@ -17,6 +18,7 @@
js_to_json, js_to_json,
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
traverse_obj,
unified_timestamp, unified_timestamp,
unescapeHTML, unescapeHTML,
url_or_none, url_or_none,
@ -48,28 +50,11 @@ def _extract_webpage_player_entries(self, webpage, playlist_id, base_data):
yield entry yield entry
class PolskieRadioIE(PolskieRadioBaseExtractor): class PolskieRadioLegacyIE(PolskieRadioBaseExtractor):
_VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)' # legacy sites
_TESTS = [{ # Old-style single broadcast. IE_NAME = 'polskieradio:legacy'
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/[Aa]rtykul/(?P<id>\d+)'
'info_dict': { _TESTS = [{
'id': '1587943',
'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
},
'playlist': [{
'md5': '2984ee6ce9046d91fc233bc1a864a09a',
'info_dict': {
'id': '1540576',
'ext': 'mp3',
'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
'timestamp': 1456594200,
'upload_date': '20160227',
'duration': 2364,
'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$'
},
}],
}, { # New-style single broadcast.
'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo', 'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo',
'info_dict': { 'info_dict': {
'id': '2534482', 'id': '2534482',
@ -96,16 +81,6 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
'ext': 'mp3', 'ext': 'mp3',
'title': 'Pogłos 29 października godz. 23:01', 'title': 'Pogłos 29 października godz. 23:01',
}, },
}, {
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
'only_matching': True,
}, {
'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943',
'only_matching': True,
}, {
# with mp4 video
'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej',
'only_matching': True,
}, { }, {
'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci', 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci',
'only_matching': True, 'only_matching': True,
@ -114,7 +89,9 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id) webpage, urlh = self._download_webpage_handle(url, playlist_id)
if PolskieRadioIE.suitable(urlh.url):
return self.url_result(urlh.url, PolskieRadioIE, playlist_id)
content = self._search_regex( content = self._search_regex(
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>', r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
@ -153,23 +130,160 @@ def _real_extract(self, url):
return self.playlist_result(entries, playlist_id, title, description) return self.playlist_result(entries, playlist_id, title, description)
class PolskieRadioCategoryIE(InfoExtractor): class PolskieRadioIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)' # new next.js sites, excluding radiokierowcow.pl
_VALID_URL = r'https?://(?:[^/]+\.)?polskieradio(?:24)?\.pl/artykul/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', 'url': 'https://jedynka.polskieradio.pl/artykul/1587943',
'info_dict': {
'id': '1587943',
'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
},
'playlist': [{
'md5': '2984ee6ce9046d91fc233bc1a864a09a',
'info_dict': {
'id': '7a85d429-5356-4def-a347-925e4ae7406b',
'ext': 'mp3',
'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
},
}],
}, {
'url': 'https://trojka.polskieradio.pl/artykul/1632955',
'only_matching': True,
}, {
# with mp4 video
'url': 'https://trojka.polskieradio.pl/artykul/1634903',
'only_matching': True,
}, {
'url': 'https://jedynka.polskieradio.pl/artykul/3042436,Polityka-wschodnia-ojca-i-syna-Wladyslawa-Lokietka-i-Kazimierza-Wielkiego',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
article_data = traverse_obj(
self._search_nextjs_data(webpage, playlist_id), ('props', 'pageProps', 'data', 'articleData'))
title = strip_or_none(article_data['title'])
description = strip_or_none(article_data.get('lead'))
entries = [{
'url': entry['file'],
'ext': determine_ext(entry.get('fileName')),
'id': self._search_regex(
r'([a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12})', entry['file'], 'entry id'),
'title': strip_or_none(entry.get('description')) or title,
} for entry in article_data.get('attachments') or () if entry['fileType'] in ('Audio', )]
return self.playlist_result(entries, playlist_id, title, description)
class PolskieRadioAuditionIE(InfoExtractor):
# new next.js sites
IE_NAME = 'polskieradio:audition'
_VALID_URL = r'https?://(?:[^/]+\.)?polskieradio\.pl/audycj[ae]/(?P<id>\d+)'
_TESTS = [{
# articles, PR1
'url': 'https://jedynka.polskieradio.pl/audycje/5102',
'info_dict': { 'info_dict': {
'id': '5102', 'id': '5102',
'title': 'HISTORIA ŻYWA', 'title': 'Historia żywa',
'thumbnail': r're:https://static\.prsa\.pl/images/.+',
}, },
'playlist_mincount': 38, 'playlist_mincount': 38,
}, { }, {
'url': 'http://www.polskieradio.pl/7/4807', # episodes, PR1
'url': 'https://jedynka.polskieradio.pl/audycje/5769',
'info_dict': { 'info_dict': {
'id': '4807', 'id': '5769',
'title': 'Vademecum 1050. rocznicy Chrztu Polski' 'title': 'AgroFakty',
'thumbnail': r're:https://static\.prsa\.pl/images/.+',
}, },
'playlist_mincount': 5 'playlist_mincount': 269,
}, { }, {
# both episodes and articles, PR3
'url': 'https://trojka.polskieradio.pl/audycja/8906',
'info_dict': {
'id': '8906',
'title': 'Trójka budzi',
'thumbnail': r're:https://static\.prsa\.pl/images/.+',
},
'playlist_mincount': 722,
}]
def _call_lp3(self, path, query, video_id, note):
return self._download_json(
f'https://lp3test.polskieradio.pl/{path}', video_id, note,
query=query, headers={'x-api-key': '9bf6c5a2-a7d0-4980-9ed7-a3f7291f2a81'})
def _entries(self, playlist_id, has_episodes, has_articles):
for i in itertools.count(1) if has_episodes else []:
page = self._call_lp3(
'AudioArticle/GetListByCategoryId', {
'categoryId': playlist_id,
'PageSize': 10,
'skip': i,
'format': 400,
}, playlist_id, f'Downloading episode list page {i}')
if not traverse_obj(page, 'data'):
break
for episode in page['data']:
yield {
'id': str(episode['id']),
'url': episode['file'],
'title': episode.get('title'),
'duration': int_or_none(episode.get('duration')),
'timestamp': parse_iso8601(episode.get('datePublic')),
}
for i in itertools.count(1) if has_articles else []:
page = self._call_lp3(
'Article/GetListByCategoryId', {
'categoryId': playlist_id,
'PageSize': 9,
'skip': i,
'format': 400,
}, playlist_id, f'Downloading article list page {i}')
if not traverse_obj(page, 'data'):
break
for article in page['data']:
yield {
'_type': 'url_transparent',
'ie_key': PolskieRadioIE.ie_key(),
'id': str(article['id']),
'url': article['url'],
'title': article.get('shortTitle'),
'description': traverse_obj(article, ('description', 'lead')),
'timestamp': parse_iso8601(article.get('datePublic')),
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
page_props = traverse_obj(
self._search_nextjs_data(self._download_webpage(url, playlist_id), playlist_id),
('props', 'pageProps', ('data', None)), get_all=False)
has_episodes = bool(traverse_obj(page_props, 'episodes', 'audios'))
has_articles = bool(traverse_obj(page_props, 'articles'))
return self.playlist_result(
self._entries(playlist_id, has_episodes, has_articles), playlist_id,
title=traverse_obj(page_props, ('details', 'name')),
description=traverse_obj(page_props, ('details', 'description', 'lead')),
thumbnail=traverse_obj(page_props, ('details', 'photo')))
class PolskieRadioCategoryIE(InfoExtractor):
# legacy sites
IE_NAME = 'polskieradio:category'
_VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source',
'only_matching': True 'only_matching': True
}, { }, {
@ -186,9 +300,6 @@ class PolskieRadioCategoryIE(InfoExtractor):
'title': 'Muzyka', 'title': 'Muzyka',
}, },
'playlist_mincount': 61 'playlist_mincount': 61
}, {
'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA',
'only_matching': True,
}, { }, {
'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka',
'only_matching': True, 'only_matching': True,
@ -196,7 +307,7 @@ class PolskieRadioCategoryIE(InfoExtractor):
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) return False if PolskieRadioLegacyIE.suitable(url) else super().suitable(url)
def _entries(self, url, page, category_id): def _entries(self, url, page, category_id):
content = page content = page
@ -209,7 +320,7 @@ def _entries(self, url, page, category_id):
if not href: if not href:
continue continue
yield self.url_result( yield self.url_result(
compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), compat_urlparse.urljoin(url, href), PolskieRadioLegacyIE,
entry_id, entry.get('title')) entry_id, entry.get('title'))
mobj = re.search( mobj = re.search(
r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1',
@ -222,7 +333,9 @@ def _entries(self, url, page, category_id):
def _real_extract(self, url): def _real_extract(self, url):
category_id = self._match_id(url) category_id = self._match_id(url)
webpage = self._download_webpage(url, category_id) webpage, urlh = self._download_webpage_handle(url, category_id)
if PolskieRadioAuditionIE.suitable(urlh.url):
return self.url_result(urlh.url, PolskieRadioAuditionIE, category_id)
title = self._html_search_regex( title = self._html_search_regex(
r'<title>([^<]+) - [^<]+ - [^<]+</title>', r'<title>([^<]+) - [^<]+ - [^<]+</title>',
webpage, 'title', fatal=False) webpage, 'title', fatal=False)
@ -358,7 +471,7 @@ def get_page(page_num):
'entries': InAdvancePagedList( 'entries': InAdvancePagedList(
get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE), get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE),
'id': str(data['id']), 'id': str(data['id']),
'title': data['title'], 'title': data.get('title'),
'description': data.get('description'), 'description': data.get('description'),
'uploader': data.get('announcer'), 'uploader': data.get('announcer'),
} }
@ -374,6 +487,10 @@ class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor):
'ext': 'mp3', 'ext': 'mp3',
'title': 'Theresa May rezygnuje. Co dalej z brexitem?', 'title': 'Theresa May rezygnuje. Co dalej z brexitem?',
'description': 'md5:e41c409a29d022b70ef0faa61dbded60', 'description': 'md5:e41c409a29d022b70ef0faa61dbded60',
'episode': 'Theresa May rezygnuje. Co dalej z brexitem?',
'duration': 2893,
'thumbnail': 'https://static.prsa.pl/images/58649376-c8a0-4ba2-a714-78b383285f5f.jpg',
'series': 'Raport o stanie świata',
}, },
}] }]

View file

@ -32,6 +32,7 @@ class RedditIE(InfoExtractor):
'dislike_count': int, 'dislike_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 0, 'age_limit': 0,
'channel_id': 'videos',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -55,6 +56,7 @@ class RedditIE(InfoExtractor):
'dislike_count': int, 'dislike_count': int,
'comment_count': int, 'comment_count': int,
'age_limit': 0, 'age_limit': 0,
'channel_id': 'aww',
}, },
}, { }, {
# videos embedded in reddit text post # videos embedded in reddit text post
@ -165,6 +167,7 @@ def add_thumbnail(src):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'timestamp': float_or_none(data.get('created_utc')), 'timestamp': float_or_none(data.get('created_utc')),
'uploader': data.get('author'), 'uploader': data.get('author'),
'channel_id': data.get('subreddit'),
'like_count': int_or_none(data.get('ups')), 'like_count': int_or_none(data.get('ups')),
'dislike_count': int_or_none(data.get('downs')), 'dislike_count': int_or_none(data.get('downs')),
'comment_count': int_or_none(data.get('num_comments')), 'comment_count': int_or_none(data.get('num_comments')),

View file

@ -1,8 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import extract_attributes, int_or_none, remove_start, traverse_obj
int_or_none,
remove_start,
)
class RozhlasIE(InfoExtractor): class RozhlasIE(InfoExtractor):
@ -45,3 +42,138 @@ def _real_extract(self, url):
'duration': duration, 'duration': duration,
'vcodec': 'none', 'vcodec': 'none',
} }
class RozhlasVltavaIE(InfoExtractor):
_VALID_URL = r'https?://(?:\w+\.rozhlas|english\.radio)\.cz/[\w-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
'md5': 'ba2fdbc1242fc16771c7695d271ec355',
'info_dict': {
'id': 8891337,
'title': 'md5:21f99739d04ab49d8c189ec711eef4ec',
},
'playlist_count': 1,
'playlist': [{
'md5': 'ba2fdbc1242fc16771c7695d271ec355',
'info_dict': {
'id': '10520988',
'ext': 'mp3',
'title': 'Papej masíčko! Porcujeme a bilancujeme filmy a seriály, které to letos zabily',
'description': 'md5:1c6d29fb9564e1f17fc1bb83ae7da0bc',
'duration': 1574,
'artist': 'Aleš Stuchlý',
'channel_id': 'radio-wave',
},
}]
}, {
'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744',
'info_dict': {
'id': 8554744,
'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko',
},
'playlist_count': 5,
'playlist': [{
'md5': '93d4109cf8f40523699ae9c1d4600bdd',
'info_dict': {
'id': '9890713',
'ext': 'mp3',
'title': 'Neklid #1',
'description': '1. díl: Neklid: 1. díl',
'duration': 1025,
'artist': 'Josef Kokta',
'channel_id': 'radio-wave',
'chapter': 'Neklid #1',
'chapter_number': 1,
},
}, {
'md5': 'e9763235be4a6dcf94bc8a5bac1ca126',
'info_dict': {
'id': '9890716',
'ext': 'mp3',
'title': 'Neklid #2',
'description': '2. díl: Neklid: 2. díl',
'duration': 768,
'artist': 'Josef Kokta',
'channel_id': 'radio-wave',
'chapter': 'Neklid #2',
'chapter_number': 2,
},
}, {
'md5': '00b642ea94b78cc949ac84da09f87895',
'info_dict': {
'id': '9890722',
'ext': 'mp3',
'title': 'Neklid #3',
'description': '3. díl: Neklid: 3. díl',
'duration': 607,
'artist': 'Josef Kokta',
'channel_id': 'radio-wave',
'chapter': 'Neklid #3',
'chapter_number': 3,
},
}, {
'md5': 'faef97b1b49da7df874740f118c19dea',
'info_dict': {
'id': '9890728',
'ext': 'mp3',
'title': 'Neklid #4',
'description': '4. díl: Neklid: 4. díl',
'duration': 621,
'artist': 'Josef Kokta',
'channel_id': 'radio-wave',
'chapter': 'Neklid #4',
'chapter_number': 4,
},
}, {
'md5': '6e729fa39b647325b868d419c76f3efa',
'info_dict': {
'id': '9890734',
'ext': 'mp3',
'title': 'Neklid #5',
'description': '5. díl: Neklid: 5. díl',
'duration': 908,
'artist': 'Josef Kokta',
'channel_id': 'radio-wave',
'chapter': 'Neklid #5',
'chapter_number': 5,
},
}]
}]
def _extract_video(self, entry):
chapter_number = int_or_none(traverse_obj(entry, ('meta', 'ga', 'contentSerialPart')))
return {
'id': entry['meta']['ga']['contentId'],
'title': traverse_obj(entry, ('meta', 'ga', 'contentName')),
'description': entry.get('title'),
'duration': entry.get('duration'),
'artist': traverse_obj(entry, ('meta', 'ga', 'contentAuthor')),
'channel_id': traverse_obj(entry, ('meta', 'ga', 'contentCreator')),
'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
'chapter_number': chapter_number,
'formats': [{
'url': audio_link['url'],
'ext': audio_link.get('variant'),
'format_id': audio_link.get('variant'),
'abr': audio_link.get('bitrate'),
'acodec': audio_link.get('variant'),
'vcodec': 'none',
} for audio_link in entry['audioLinks']],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
# FIXME: Use get_element_text_and_html_by_tag when it accepts less strict html
data = self._parse_json(extract_attributes(self._search_regex(
r'(<div class="mujRozhlasPlayer" data-player=\'[^\']+\'>)',
webpage, 'player'))['data-player'], video_id)['data']
return {
'_type': 'playlist',
'id': data.get('embedId'),
'title': traverse_obj(data, ('series', 'title')),
'entries': map(self._extract_video, data['playlist']),
}

View file

@ -1,16 +1,24 @@
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
int_or_none,
parse_qs,
smuggle_url, smuggle_url,
traverse_obj, traverse_obj,
unified_timestamp, unified_timestamp,
update_url_query,
url_or_none, url_or_none,
xpath_text,
) )
class SlidesLiveIE(InfoExtractor): class SlidesLiveIE(InfoExtractor):
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)' _VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
# service_name = yoda # service_name = yoda, only XML slides info
'url': 'https://slideslive.com/38902413/gcc-ia16-backend', 'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
'info_dict': { 'info_dict': {
'id': '38902413', 'id': '38902413',
@ -19,12 +27,15 @@ class SlidesLiveIE(InfoExtractor):
'timestamp': 1648189972, 'timestamp': 1648189972,
'upload_date': '20220325', 'upload_date': '20220325',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:42',
'chapters': 'count:41',
'duration': 1638,
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, { }, {
# service_name = yoda # service_name = yoda, /v7/ slides
'url': 'https://slideslive.com/38935785', 'url': 'https://slideslive.com/38935785',
'info_dict': { 'info_dict': {
'id': '38935785', 'id': '38935785',
@ -32,13 +43,16 @@ class SlidesLiveIE(InfoExtractor):
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges', 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
'upload_date': '20211115', 'upload_date': '20211115',
'timestamp': 1636996003, 'timestamp': 1636996003,
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:640',
'chapters': 'count:639',
'duration': 9832,
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, { }, {
# service_name = yoda # service_name = yoda, /v1/ slides
'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics', 'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
'info_dict': { 'info_dict': {
'id': '38973182', 'id': '38973182',
@ -47,12 +61,15 @@ class SlidesLiveIE(InfoExtractor):
'upload_date': '20220201', 'upload_date': '20220201',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1643728135, 'timestamp': 1643728135,
'thumbnails': 'count:3',
'chapters': 'count:2',
'duration': 5889,
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, { }, {
# service_name = youtube # service_name = youtube, only XML slides info
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost', 'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
'md5': '8a79b5e3d700837f40bd2afca3c8fa01', 'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
'info_dict': { 'info_dict': {
@ -76,26 +93,278 @@ class SlidesLiveIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'channel_follower_count': int, 'channel_follower_count': int,
'age_limit': 0, 'age_limit': 0,
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
'thumbnails': 'count:169',
'playable_in_embed': True, 'playable_in_embed': True,
'availability': 'unlisted', 'availability': 'unlisted',
'tags': [], 'tags': [],
'categories': ['People & Blogs'], 'categories': ['People & Blogs'],
'chapters': 'count:168',
}, },
}, { }, {
# service_name = youtube # embed-only presentation, only XML slides info
'url': 'https://slideslive.com/embed/presentation/38925850',
'info_dict': {
'id': '38925850',
'ext': 'mp4',
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8',
'timestamp': 1629671508,
'upload_date': '20210822',
'chapters': 'count:7',
'duration': 326,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# embed-only presentation, only JSON slides info, /v5/ slides (.png)
'url': 'https://slideslive.com/38979920/',
'info_dict': {
'id': '38979920',
'ext': 'mp4',
'title': 'MoReL: Multi-omics Relational Learning',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:7',
'timestamp': 1654714970,
'upload_date': '20220608',
'chapters': 'count:6',
'duration': 171,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# /v2/ slides (.jpg)
'url': 'https://slideslive.com/38954074',
'info_dict': {
'id': '38954074',
'ext': 'mp4',
'title': 'Decentralized Attribution of Generative Models',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:16',
'timestamp': 1622806321,
'upload_date': '20210604',
'chapters': 'count:15',
'duration': 306,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# /v4/ slides (.png)
'url': 'https://slideslive.com/38979570/',
'info_dict': {
'id': '38979570',
'ext': 'mp4',
'title': 'Efficient Active Search for Combinatorial Optimization Problems',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:9',
'timestamp': 1654714896,
'upload_date': '20220608',
'chapters': 'count:8',
'duration': 295,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# /v10/ slides
'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F',
'info_dict': {
'id': '38979880',
'ext': 'mp4',
'title': 'The Representation Power of Neural Networks',
'timestamp': 1654714962,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:22',
'upload_date': '20220608',
'chapters': 'count:21',
'duration': 294,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# /v7/ slides, 2 video slides
'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com',
'playlist_count': 3,
'info_dict': {
'id': '38979682-playlist',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
},
'playlist': [{
'info_dict': {
'id': '38979682',
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
'timestamp': 1654714920,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:30',
'upload_date': '20220608',
'chapters': 'count:31',
'duration': 272,
},
}, {
'info_dict': {
'id': '38979682-021',
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
'duration': 3,
'timestamp': 1654714920,
'upload_date': '20220608',
},
}, {
'info_dict': {
'id': '38979682-024',
'ext': 'mp4',
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
'duration': 4,
'timestamp': 1654714920,
'upload_date': '20220608',
},
}],
'params': {
'skip_download': 'm3u8',
},
}, {
# /v6/ slides, 1 video slide, edit.videoken.com embed
'url': 'https://slideslive.com/38979481/',
'playlist_count': 2,
'info_dict': {
'id': '38979481-playlist',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
},
'playlist': [{
'info_dict': {
'id': '38979481',
'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
'timestamp': 1654714877,
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:43',
'upload_date': '20220608',
'chapters': 'count:43',
'duration': 315,
},
}, {
'info_dict': {
'id': '38979481-013',
'ext': 'mp4',
'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
'duration': 3,
'timestamp': 1654714877,
'upload_date': '20220608',
},
}],
'params': {
'skip_download': 'm3u8',
},
}, {
# /v3/ slides, .jpg and .png, service_name = youtube
'url': 'https://slideslive.com/embed/38932460/',
'info_dict': {
'id': 'RTPdrgkyTiE',
'display_id': '38932460',
'ext': 'mp4',
'title': 'Active Learning for Hierarchical Multi-Label Classification',
'description': 'Watch full version of this video at https://slideslive.com/38932460.',
'channel': 'SlidesLive Videos - A',
'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
'upload_date': '20200903',
'timestamp': 1602599092,
'duration': 942,
'age_limit': 0,
'live_status': 'not_live',
'playable_in_embed': True,
'availability': 'unlisted',
'categories': ['People & Blogs'],
'tags': [],
'channel_follower_count': int,
'like_count': int,
'view_count': int,
'thumbnail': r're:^https?://.*\.(?:jpg|png|webp)',
'thumbnails': 'count:21',
'chapters': 'count:20',
},
'params': {
'skip_download': 'm3u8',
},
}, {
# /v3/ slides, .png only, service_name = yoda
'url': 'https://slideslive.com/38983994',
'info_dict': {
'id': '38983994',
'ext': 'mp4',
'title': 'Zero-Shot AutoML with Pretrained Models',
'timestamp': 1662384834,
'upload_date': '20220905',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:23',
'chapters': 'count:22',
'duration': 295,
},
'params': {
'skip_download': 'm3u8',
},
}, {
# service_name = yoda
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
'only_matching': True, 'only_matching': True,
}, { }, {
# service_name = url # dead link, service_name = url
'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
'only_matching': True, 'only_matching': True,
}, { }, {
# service_name = vimeo # dead link, service_name = vimeo
'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
# only XML slides info
'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html',
'info_dict': {
'id': '38925850',
'ext': 'mp4',
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
'thumbnail': r're:^https?://.*\.jpg',
'thumbnails': 'count:8',
'timestamp': 1629671508,
'upload_date': '20210822',
'chapters': 'count:7',
'duration': 326,
},
'params': {
'skip_download': 'm3u8',
},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
# Reference: https://slideslive.com/embed_presentation.js
for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s*\([^)]+\bpresentationId:\s*["\'](\d+)["\']', webpage):
url_parsed = urllib.parse.urlparse(url)
origin = f'{url_parsed.scheme}://{url_parsed.netloc}'
yield update_url_query(
f'https://slideslive.com/embed/presentation/{embed_id}', {
'embed_parent_url': url,
'embed_container_origin': origin,
})
def _download_embed_webpage_handle(self, video_id, headers):
return self._download_webpage_handle(
f'https://slideslive.com/embed/presentation/{video_id}', video_id,
headers=headers, query=traverse_obj(headers, {
'embed_parent_url': 'Referer',
'embed_container_origin': 'Origin',
}))
def _extract_custom_m3u8_info(self, m3u8_data): def _extract_custom_m3u8_info(self, m3u8_data):
m3u8_dict = {} m3u8_dict = {}
@ -108,6 +377,8 @@ def _extract_custom_m3u8_info(self, m3u8_data):
'VOD-VIDEO-ID': 'service_id', 'VOD-VIDEO-ID': 'service_id',
'VOD-VIDEO-SERVERS': 'video_servers', 'VOD-VIDEO-SERVERS': 'video_servers',
'VOD-SUBTITLES': 'subtitles', 'VOD-SUBTITLES': 'subtitles',
'VOD-SLIDES-JSON-URL': 'slides_json_url',
'VOD-SLIDES-XML-URL': 'slides_xml_url',
} }
for line in m3u8_data.splitlines(): for line in m3u8_data.splitlines():
@ -126,9 +397,46 @@ def _extract_custom_m3u8_info(self, m3u8_data):
return m3u8_dict return m3u8_dict
def _extract_formats_and_duration(self, cdn_hostname, path, video_id, skip_duration=False):
formats, duration = [], None
hls_formats = self._extract_m3u8_formats(
f'https://{cdn_hostname}/{path}/master.m3u8',
video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)
if hls_formats:
if not skip_duration:
duration = self._extract_m3u8_vod_duration(
hls_formats[0]['url'], video_id, note='Extracting duration from HLS manifest')
formats.extend(hls_formats)
dash_formats = self._extract_mpd_formats(
f'https://{cdn_hostname}/{path}/master.mpd', video_id, mpd_id='dash', fatal=False)
if dash_formats:
if not duration and not skip_duration:
duration = self._extract_mpd_vod_duration(
f'https://{cdn_hostname}/{path}/master.mpd', video_id,
note='Extracting duration from DASH manifest')
formats.extend(dash_formats)
return formats, duration
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage, urlh = self._download_embed_webpage_handle(
video_id, headers=traverse_obj(parse_qs(url), {
'Referer': ('embed_parent_url', -1),
'Origin': ('embed_container_origin', -1)}))
redirect_url = urlh.geturl()
if 'domain_not_allowed' in redirect_url:
domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
if not domain:
raise ExtractorError(
'This is an embed-only presentation. Try passing --referer', expected=True)
webpage, _ = self._download_embed_webpage_handle(video_id, headers={
'Referer': f'https://{domain}/',
'Origin': f'https://{domain}',
})
player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token') player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
player_data = self._download_webpage( player_data = self._download_webpage(
f'https://ben.slideslive.com/player/{video_id}', video_id, f'https://ben.slideslive.com/player/{video_id}', video_id,
@ -139,6 +447,48 @@ def _real_extract(self, url):
assert service_name in ('url', 'yoda', 'vimeo', 'youtube') assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
service_id = player_info['service_id'] service_id = player_info['service_id']
slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s%s'
slides, slides_info = {}, []
if player_info.get('slides_json_url'):
slides = self._download_json(
player_info['slides_json_url'], video_id, fatal=False,
note='Downloading slides JSON', errnote=False) or {}
slide_ext_default = '.png'
slide_quality = traverse_obj(slides, ('slide_qualities', 0))
if slide_quality:
slide_ext_default = '.jpg'
slide_url_template = f'https://cdn.slideslive.com/data/presentations/%s/slides/{slide_quality}/%s%s'
for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...), expected_type=dict), 1):
slides_info.append((
slide_id, traverse_obj(slide, ('image', 'name')),
traverse_obj(slide, ('image', 'extname'), default=slide_ext_default),
int_or_none(slide.get('time'), scale=1000)))
if not slides and player_info.get('slides_xml_url'):
slides = self._download_xml(
player_info['slides_xml_url'], video_id, fatal=False,
note='Downloading slides XML', errnote='Failed to download slides info')
slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
for slide_id, slide in enumerate(slides.findall('./slide') if slides else [], 1):
slides_info.append((
slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
int_or_none(xpath_text(slide, './timeSec', 'time'))))
chapters, thumbnails = [], []
if url_or_none(player_info.get('thumbnail')):
thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']})
for slide_id, slide_path, slide_ext, start_time in slides_info:
if slide_path:
thumbnails.append({
'id': f'{slide_id:03d}',
'url': slide_url_template % (video_id, slide_path, slide_ext),
})
chapters.append({
'title': f'Slide {slide_id:03d}',
'start_time': start_time,
})
subtitles = {} subtitles = {}
for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict): for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
webvtt_url = url_or_none(sub.get('webvtt_url')) webvtt_url = url_or_none(sub.get('webvtt_url'))
@ -154,23 +504,18 @@ def _real_extract(self, url):
'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''), 'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
'timestamp': unified_timestamp(player_info.get('timestamp')), 'timestamp': unified_timestamp(player_info.get('timestamp')),
'is_live': player_info.get('playlist_type') != 'vod', 'is_live': player_info.get('playlist_type') != 'vod',
'thumbnail': url_or_none(player_info.get('thumbnail')), 'thumbnails': thumbnails,
'chapters': chapters,
'subtitles': subtitles, 'subtitles': subtitles,
} }
if service_name in ('url', 'yoda'):
if service_name == 'url': if service_name == 'url':
info['url'] = service_id info['url'] = service_id
else: elif service_name == 'yoda':
cdn_hostname = player_info['video_servers'][0] formats, duration = self._extract_formats_and_duration(
formats = [] player_info['video_servers'][0], service_id, video_id)
formats.extend(self._extract_m3u8_formats(
f'https://{cdn_hostname}/{service_id}/master.m3u8',
video_id, 'mp4', m3u8_id='hls', fatal=False, live=True))
formats.extend(self._extract_mpd_formats(
f'https://{cdn_hostname}/{service_id}/master.mpd',
video_id, mpd_id='dash', fatal=False))
info.update({ info.update({
'duration': duration,
'formats': formats, 'formats': formats,
}) })
else: else:
@ -185,4 +530,38 @@ def _real_extract(self, url):
f'https://player.vimeo.com/video/{service_id}', f'https://player.vimeo.com/video/{service_id}',
{'http_headers': {'Referer': url}}) {'http_headers': {'Referer': url}})
video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
if not video_slides:
return info return info
def entries():
yield info
service_data = self._download_json(
f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data',
video_id, fatal=False, query={
'player_token': player_token,
'videos': ','.join(video_slides),
}, note='Downloading video slides info', errnote='Failed to download video slides info') or {}
for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
if not traverse_obj(slide, ('video', 'service')) == 'yoda':
continue
video_path = traverse_obj(slide, ('video', 'id'))
cdn_hostname = traverse_obj(service_data, (
video_path, 'video_servers', ...), get_all=False)
if not cdn_hostname or not video_path:
continue
formats, _ = self._extract_formats_and_duration(
cdn_hostname, video_path, video_id, skip_duration=True)
if not formats:
continue
yield {
'id': f'{video_id}-{slide_id:03d}',
'title': f'{info["title"]} - Slide {slide_id:03d}',
'timestamp': info['timestamp'],
'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000),
'formats': formats,
}
return self.playlist_result(entries(), f'{video_id}-playlist', info['title'])

View file

@ -782,6 +782,27 @@ def _real_extract(self, url):
'%s (%s)' % (user['username'], resource.capitalize())) '%s (%s)' % (user['username'], resource.capitalize()))
class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE):
_VALID_URL = r'https?://api\.soundcloud\.com/users/(?P<id>\d+)'
IE_NAME = 'soundcloud:user:permalink'
_TESTS = [{
'url': 'https://api.soundcloud.com/users/30909869',
'info_dict': {
'id': '30909869',
'title': 'neilcic',
},
'playlist_mincount': 23,
}]
def _real_extract(self, url):
user_id = self._match_id(url)
user = self._download_json(
self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS)
return self._extract_playlist(
f'{self._API_V2_BASE}stream/users/{user["id"]}', str(user['id']), user.get('username'))
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
_VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
IE_NAME = 'soundcloud:trackstation' IE_NAME = 'soundcloud:trackstation'

View file

@ -177,7 +177,6 @@ class SpankBangPlaylistIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage( webpage = self._download_webpage(
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
@ -186,11 +185,11 @@ def _real_extract(self, url):
urljoin(url, mobj.group('path')), urljoin(url, mobj.group('path')),
ie=SpankBangIE.ie_key(), video_id=mobj.group('id')) ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
for mobj in re.finditer( for mobj in re.finditer(
r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1' r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/[^"\'](?:(?!\1).)*)\1',
% re.escape(display_id), webpage)] webpage)]
title = self._html_search_regex( title = self._html_search_regex(
r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title', r'<em>([^<]+)</em>\s+playlist\s*<', webpage, 'playlist title',
fatal=False) fatal=False)
return self.playlist_result(entries, playlist_id, title) return self.playlist_result(entries, playlist_id, title)

View file

@ -73,6 +73,8 @@ def _real_extract(self, url):
}) })
programme = result.get('programme') or {} programme = result.get('programme') or {}
if programme.get('drmEnabled'):
self.report_drm(video_id)
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',

View file

@ -32,7 +32,7 @@ def _get_ckey(self, video_id, url, guid):
padding_mode='whitespace').hex().upper() padding_mode='whitespace').hex().upper()
def _get_video_api_response(self, video_url, video_id, series_id, subtitle_format, video_format, video_quality): def _get_video_api_response(self, video_url, video_id, series_id, subtitle_format, video_format, video_quality):
guid = ''.join([random.choice(string.digits + string.ascii_lowercase) for _ in range(16)]) guid = ''.join(random.choices(string.digits + string.ascii_lowercase, k=16))
ckey = self._get_ckey(video_id, video_url, guid) ckey = self._get_ckey(video_id, video_url, guid)
query = { query = {
'vid': video_id, 'vid': video_id,
@ -55,7 +55,7 @@ def _get_video_api_response(self, video_url, video_id, series_id, subtitle_forma
'platform': self._PLATFORM, 'platform': self._PLATFORM,
# For VQQ # For VQQ
'guid': guid, 'guid': guid,
'flowid': ''.join(random.choice(string.digits + string.ascii_lowercase) for _ in range(32)), 'flowid': ''.join(random.choices(string.digits + string.ascii_lowercase, k=32)),
} }
return self._search_json(r'QZOutputJson=', self._download_webpage( return self._search_json(r'QZOutputJson=', self._download_webpage(

View file

@ -23,11 +23,12 @@ def _real_extract(self, url):
if len(matching_extractors) == 0: if len(matching_extractors) == 0:
raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True) raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True)
elif len(matching_extractors) > 1: elif len(matching_extractors) > 1:
try: # Check for exact match extractor = next(( # Check for exact match
extractor = next( ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower()
ie for ie in matching_extractors ), None) or next(( # Check for exact match without plugin suffix
if ie.IE_NAME.lower() == extractor_id.lower()) ie for ie in matching_extractors if ie.IE_NAME.split('+')[0].lower() == extractor_id.lower()
except StopIteration: ), None)
if not extractor:
raise ExtractorError( raise ExtractorError(
'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors), 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors),
expected=True) expected=True)

226
yt_dlp/extractor/thisvid.py Normal file
View file

@ -0,0 +1,226 @@
import itertools
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
int_or_none,
url_or_none,
urljoin,
)
class ThisVidIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
'md5': '839becb572995687e11a69dc4358a386',
'info_dict': {
'id': '3533241',
'ext': 'mp4',
'title': 'Sitting on ball tight jeans',
'description': 'md5:372353bb995883d1b65fddf507489acd',
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
'uploader_id': '150629',
'uploader': 'jeanslevisjeans',
'display_id': 'sitting-on-ball-tight-jeans',
'age_limit': 18,
}
}, {
'url': 'https://thisvid.com/embed/3533241/',
'md5': '839becb572995687e11a69dc4358a386',
'info_dict': {
'id': '3533241',
'ext': 'mp4',
'title': 'Sitting on ball tight jeans',
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
'uploader_id': '150629',
'uploader': 'jeanslevisjeans',
'display_id': 'sitting-on-ball-tight-jeans',
'age_limit': 18,
}
}]
def _real_extract(self, url):
main_id, type_ = re.match(self._VALID_URL, url).group('id', 'type')
webpage = self._download_webpage(url, main_id)
title = self._html_search_regex(
r'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
webpage, 'title')
if type_ == 'embed':
# look for more metadata
video_alt_url = url_or_none(self._search_regex(
rf'''video_alt_url\s*:\s+'({self._VALID_URL}/)',''',
webpage, 'video_alt_url', default=None))
if video_alt_url and video_alt_url != url:
webpage = self._download_webpage(
video_alt_url, main_id,
note='Redirecting embed to main page', fatal=False) or webpage
video_holder = get_element_by_class('video-holder', webpage) or ''
if '>This video is a private video' in video_holder:
self.raise_login_required(
(clean_html(video_holder) or 'Private video').partition('\n')[0])
uploader = self._html_search_regex(
r'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
webpage, 'uploader', default='')
uploader = re.split(r'''/["'][^>]*>\s*''', uploader)
if len(uploader) == 2:
# id must be non-empty, uploader could be ''
uploader_id, uploader = uploader
uploader = uploader or None
else:
uploader_id = uploader = None
return self.url_result(
url, ie='Generic', url_transparent=True,
title=title,
age_limit=18,
uploader=uploader,
uploader_id=uploader_id)
class ThisVidPlaylistBaseIE(InfoExtractor):
_PLAYLIST_URL_RE = None
@classmethod
def _find_urls(cls, html):
for m in re.finditer(rf'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>{cls._PLAYLIST_URL_RE}\b)[^>]+>''', html):
yield m.group('url')
def _generate_playlist_entries(self, url, playlist_id, html=None):
page_url = url
for page in itertools.count(1):
if not html:
html = self._download_webpage(
page_url, playlist_id, note=f'Downloading page {page}',
fatal=False) or ''
yield from self._find_urls(html)
next_page = get_element_by_class('pagination-next', html) or ''
if next_page:
# member list page
next_page = urljoin(url, self._search_regex(
r'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
next_page, 'next page link', group='url', default=None))
# in case a member page should have pagination-next with empty link, not just `else:`
if next_page is None:
# playlist page
parsed_url = urllib.parse.urlparse(page_url)
base_path, _, num = parsed_url.path.rpartition('/')
num = int_or_none(num)
if num is None:
base_path, num = parsed_url.path.rstrip('/'), 1
parsed_url = parsed_url._replace(path=f'{base_path}/{num + 1}')
next_page = urllib.parse.urlunparse(parsed_url)
if page_url == next_page:
next_page = None
if not next_page:
return
page_url, html = next_page, None
def _make_playlist_result(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
title = re.split(
r'(?i)\s*\|\s*ThisVid\.com\s*$',
self._og_search_title(webpage, default=None)
or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None
return self.playlist_from_matches(
self._generate_playlist_entries(url, playlist_id, webpage),
playlist_id=playlist_id, playlist_title=title, ie=ThisVidIE)
class ThisVidMemberIE(ThisVidPlaylistBaseIE):
_VALID_URL = r'https?://thisvid\.com/members/(?P<id>\d+)'
_TESTS = [{
'url': 'https://thisvid.com/members/2140501/',
'info_dict': {
'id': '2140501',
'title': 'Rafflesia\'s Profile',
},
'playlist_mincount': 16,
}, {
'url': 'https://thisvid.com/members/2140501/favourite_videos/',
'info_dict': {
'id': '2140501',
'title': 'Rafflesia\'s Favourite Videos',
},
'playlist_mincount': 15,
}, {
'url': 'https://thisvid.com/members/636468/public_videos/',
'info_dict': {
'id': '636468',
'title': 'Happymouth\'s Public Videos',
},
'playlist_mincount': 196,
}]
_PLAYLIST_URL_RE = ThisVidIE._VALID_URL
def _real_extract(self, url):
return self._make_playlist_result(url)
class ThisVidPlaylistIE(ThisVidPlaylistBaseIE):
_VALID_URL = r'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
_TESTS = [{
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
'info_dict': {
'id': '6615',
'title': 'Underwear Stuff',
},
'playlist_mincount': 200,
}, {
'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
'info_dict': {
'id': '1072387',
'ext': 'mp4',
'title': 'Big Italian Booty 28',
'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
'uploader_id': '367912',
'uploader': 'Jcmusclefun',
'age_limit': 18,
'display_id': 'big-italian-booty-28',
'thumbnail': r're:https?://\w+\.thisvid\.com/(?:[^/]+/)+1072387/preview\.jpg',
},
'params': {
'noplaylist': True,
},
}]
_PLAYLIST_URL_RE = _VALID_URL
def _generate_playlist_entries(self, url, playlist_id, html=None):
for wrapped_url in super()._generate_playlist_entries(url, playlist_id, html):
video_id = re.match(self._VALID_URL, wrapped_url).group('video_id')
yield urljoin(url, f'/videos/{video_id}/')
def _real_extract(self, url):
playlist_id, video_id = self._match_valid_url(url).group('id', 'video_id')
if not self._yes_playlist(playlist_id, video_id):
redirect_url = urljoin(url, f'/videos/{video_id}/')
return self.url_result(redirect_url, ThisVidIE)
result = self._make_playlist_result(url)
# Fix duplicated title (`the title - the title` => `the title`)
title = result['title']
t_len = len(title)
if t_len > 5 and t_len % 2 != 0:
t_len = t_len // 2
if title[t_len] == '-':
first, second = map(str.strip, (title[:t_len], title[t_len + 1:]))
if first and first == second:
result['title'] = first
return result

View file

@ -11,6 +11,7 @@
HEADRequest, HEADRequest,
LazyList, LazyList,
UnsupportedError, UnsupportedError,
UserNotLive,
get_element_by_id, get_element_by_id,
get_first, get_first,
int_or_none, int_or_none,
@ -49,7 +50,7 @@ def _get_sigi_state(self, webpage, display_id):
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST) webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
if webpage_cookies.get('sid_tt'): if webpage_cookies.get('sid_tt'):
self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value) self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value)
@ -68,8 +69,8 @@ def _build_api_query(self, query, app_version, manifest_app_version):
'build_number': app_version, 'build_number': app_version,
'manifest_version_code': manifest_app_version, 'manifest_version_code': manifest_app_version,
'update_version_code': manifest_app_version, 'update_version_code': manifest_app_version,
'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)), 'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
'uuid': ''.join([random.choice(string.digits) for _ in range(16)]), 'uuid': ''.join(random.choices(string.digits, k=16)),
'_rticket': int(time.time() * 1000), '_rticket': int(time.time() * 1000),
'ts': int(time.time()), 'ts': int(time.time()),
'device_brand': 'Google', 'device_brand': 'Google',
@ -638,7 +639,7 @@ def _video_entries_api(self, webpage, user_id, username):
'max_cursor': 0, 'max_cursor': 0,
'min_cursor': 0, 'min_cursor': 0,
'retry_type': 'no_retry', 'retry_type': 'no_retry',
'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
} }
for page in itertools.count(1): for page in itertools.count(1):
@ -686,7 +687,7 @@ def _entries(self, list_id, display_id):
'cursor': 0, 'cursor': 0,
'count': 20, 'count': 20,
'type': 5, 'type': 5,
'device_id': ''.join(random.choice(string.digits) for i in range(19)) 'device_id': ''.join(random.choices(string.digits, k=19))
} }
for page in itertools.count(1): for page in itertools.count(1):
@ -980,3 +981,42 @@ def _real_extract(self, url):
if self.suitable(new_url): # Prevent infinite loop in case redirect fails if self.suitable(new_url): # Prevent infinite loop in case redirect fails
raise UnsupportedError(new_url) raise UnsupportedError(new_url)
return self.url_result(new_url) return self.url_result(new_url)
class TikTokLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/live'
IE_NAME = 'tiktok:live'
_TESTS = [{
'url': 'https://www.tiktok.com/@iris04201/live',
'only_matching': True,
}]
def _real_extract(self, url):
uploader = self._match_id(url)
webpage = self._download_webpage(url, uploader, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
room_id = self._html_search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
if not room_id:
raise UserNotLive(video_id=uploader)
live_info = traverse_obj(self._download_json(
'https://www.tiktok.com/api/live/detail/', room_id, query={
'aid': '1988',
'roomID': room_id,
}), 'LiveRoomInfo', expected_type=dict, default={})
if 'status' not in live_info:
raise ExtractorError('Unexpected response from TikTok API')
# status = 2 if live else 4
if not int_or_none(live_info['status']) == 2:
raise UserNotLive(video_id=uploader)
return {
'id': room_id,
'title': live_info.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage, default=''),
'uploader': uploader,
'uploader_id': traverse_obj(live_info, ('ownerInfo', 'id')),
'creator': traverse_obj(live_info, ('ownerInfo', 'nickname')),
'concurrent_view_count': traverse_obj(live_info, ('liveRoomStats', 'userCount'), expected_type=int),
'formats': self._extract_m3u8_formats(live_info['liveUrl'], room_id, 'mp4', live=True),
'is_live': True,
}

View file

@ -0,0 +1,48 @@
from .common import InfoExtractor
from ..utils import ExtractorError, int_or_none, parse_iso8601, traverse_obj
class TrtCocukVideoIE(InfoExtractor):
_VALID_URL = r'https?://www\.trtcocuk\.net\.tr/video/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.trtcocuk.net.tr/video/kaptan-pengu-ve-arkadaslari-1',
'info_dict': {
'id': '3789738',
'ext': 'mp4',
'season_number': 1,
'series': '"Kaptan Pengu ve Arkadaşları"',
'season': 'Season 1',
'title': 'Kaptan Pengu ve Arkadaşları 1 Bölüm İzle TRT Çocuk',
'release_date': '20201209',
'release_timestamp': 1607513774,
}
}, {
'url': 'https://www.trtcocuk.net.tr/video/sef-rokanin-lezzet-dunyasi-17',
'info_dict': {
'id': '10260842',
'ext': 'mp4',
'series': '"Şef Roka\'nın Lezzet Dünyası"',
'title': 'Şef Roka\'nın Lezzet Dünyası 17 Bölüm İzle TRT Çocuk',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nuxtjs_data = self._search_nuxt_data(webpage, display_id)['data']
try:
video_url = self._parse_json(nuxtjs_data['video'], display_id)
except ExtractorError:
video_url = nuxtjs_data['video']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id)
return {
'id': str(nuxtjs_data['id']),
'formats': formats,
'subtitles': subtitles,
'season_number': int_or_none(nuxtjs_data.get('season')),
'release_timestamp': parse_iso8601(nuxtjs_data.get('publishedDate')),
'series': traverse_obj(nuxtjs_data, ('show', 0, 'title')),
'title': self._html_extract_title(webpage) # TODO: get better title
}

View file

@ -38,7 +38,7 @@ class TwitCastingIE(InfoExtractor):
'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.', 'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20110822', 'upload_date': '20110822',
'timestamp': 1314010824, 'timestamp': 1313978424,
'duration': 32, 'duration': 32,
'view_count': int, 'view_count': int,
}, },
@ -52,10 +52,10 @@ class TwitCastingIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Live playing something #3689740', 'title': 'Live playing something #3689740',
'uploader_id': 'mttbernardini', 'uploader_id': 'mttbernardini',
'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.', 'description': 'md5:1dc7efa2f1ab932fcd119265cebeec69',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20120212', 'upload_date': '20120211',
'timestamp': 1329028024, 'timestamp': 1328995624,
'duration': 681, 'duration': 681,
'view_count': int, 'view_count': int,
}, },
@ -64,15 +64,22 @@ class TwitCastingIE(InfoExtractor):
'videopassword': 'abc', 'videopassword': 'abc',
}, },
}, { }, {
'note': 'archive is split in 2 parts',
'url': 'https://twitcasting.tv/loft_heaven/movie/685979292', 'url': 'https://twitcasting.tv/loft_heaven/movie/685979292',
'info_dict': { 'info_dict': {
'id': '685979292', 'id': '685979292',
'ext': 'mp4', 'ext': 'mp4',
'title': '南波一海のhear_here “ナタリー望月哲さんに聞く編集と「渋谷系狂騒曲」”', 'title': '【無料配信】南波一海のhear/here “ナタリー望月哲さんに聞く編集と「渋谷系狂騒曲」”',
'duration': 6964.599334, 'uploader_id': 'loft_heaven',
'description': 'md5:3a0c7b53019df987ce545c935538bacf',
'upload_date': '20210604',
'timestamp': 1622802114,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 6964,
'view_count': int,
},
'params': {
'skip_download': True,
}, },
'playlist_mincount': 2,
}] }]
def _parse_data_movie_playlist(self, dmp, video_id): def _parse_data_movie_playlist(self, dmp, video_id):
@ -88,15 +95,18 @@ def _parse_data_movie_playlist(self, dmp, video_id):
def _real_extract(self, url): def _real_extract(self, url):
uploader_id, video_id = self._match_valid_url(url).groups() uploader_id, video_id = self._match_valid_url(url).groups()
webpage, urlh = self._download_webpage_handle(url, video_id)
video_password = self.get_param('videopassword') video_password = self.get_param('videopassword')
request_data = None request_data = None
if video_password: if video_password:
request_data = urlencode_postdata({ request_data = urlencode_postdata({
'password': video_password, 'password': video_password,
**self._hidden_inputs(webpage),
}, encoding='utf-8') }, encoding='utf-8')
webpage, urlh = self._download_webpage_handle( webpage, urlh = self._download_webpage_handle(
url, video_id, data=request_data, url, video_id, data=request_data,
headers={'Origin': 'https://twitcasting.tv'}) headers={'Origin': 'https://twitcasting.tv'},
note='Trying video password')
if urlh.geturl() != url and request_data: if urlh.geturl() != url and request_data:
webpage = self._download_webpage( webpage = self._download_webpage(
urlh.geturl(), video_id, data=request_data, urlh.geturl(), video_id, data=request_data,
@ -122,7 +132,7 @@ def _real_extract(self, url):
duration = (try_get(video_js_data, lambda x: sum(float_or_none(y.get('duration')) for y in x) / 1000) duration = (try_get(video_js_data, lambda x: sum(float_or_none(y.get('duration')) for y in x) / 1000)
or parse_duration(clean_html(get_element_by_class('tw-player-duration-time', webpage)))) or parse_duration(clean_html(get_element_by_class('tw-player-duration-time', webpage))))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
(r'Total\s*:\s*([\d,]+)\s*Views', r'総視聴者\s*:\s*([\d,]+)\s*</'), webpage, 'views', None)) (r'Total\s*:\s*Views\s*([\d,]+)', r'総視聴者\s*:\s*([\d,]+)\s*</'), webpage, 'views', None))
timestamp = unified_timestamp(self._search_regex( timestamp = unified_timestamp(self._search_regex(
r'data-toggle="true"[^>]+datetime="([^"]+)"', r'data-toggle="true"[^>]+datetime="([^"]+)"',
webpage, 'datetime', None)) webpage, 'datetime', None))

View file

@ -11,8 +11,10 @@
int_or_none, int_or_none,
js_to_json, js_to_json,
sanitized_Request, sanitized_Request,
smuggle_url,
try_get, try_get,
unescapeHTML, unescapeHTML,
unsmuggle_url,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -106,7 +108,7 @@ def _download_lecture(self, course_id, lecture_id):
% (course_id, lecture_id), % (course_id, lecture_id),
lecture_id, 'Downloading lecture JSON', query={ lecture_id, 'Downloading lecture JSON', query={
'fields[lecture]': 'title,description,view_html,asset', 'fields[lecture]': 'title,description,view_html,asset',
'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data', 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed',
}) })
def _handle_error(self, response): def _handle_error(self, response):
@ -199,9 +201,11 @@ def is_logged(webpage):
def _real_extract(self, url): def _real_extract(self, url):
lecture_id = self._match_id(url) lecture_id = self._match_id(url)
course_id = unsmuggle_url(url, {})[1].get('course_id')
webpage = None
if not course_id:
webpage = self._download_webpage(url, lecture_id) webpage = self._download_webpage(url, lecture_id)
course_id, _ = self._extract_course_info(webpage, lecture_id) course_id, _ = self._extract_course_info(webpage, lecture_id)
try: try:
@ -209,6 +213,7 @@ def _real_extract(self, url):
except ExtractorError as e: except ExtractorError as e:
# Error could possibly mean we are not enrolled in the course # Error could possibly mean we are not enrolled in the course
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
webpage = webpage or self._download_webpage(url, lecture_id)
self._enroll_course(url, webpage, course_id) self._enroll_course(url, webpage, course_id)
lecture = self._download_lecture(course_id, lecture_id) lecture = self._download_lecture(course_id, lecture_id)
else: else:
@ -391,6 +396,9 @@ def extract_subtitles(track_list):
if f.get('url'): if f.get('url'):
formats.append(f) formats.append(f)
if not formats and asset.get('course_is_drmed'):
self.report_drm(video_id)
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -449,7 +457,9 @@ def _real_extract(self, url):
if lecture_id: if lecture_id:
entry = { entry = {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']), 'url': smuggle_url(
f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}',
{'course_id': course_id}),
'title': entry.get('title'), 'title': entry.get('title'),
'ie_key': UdemyIE.ie_key(), 'ie_key': UdemyIE.ie_key(),
} }

View file

@ -14,12 +14,13 @@ class URPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand', 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
'md5': 'ff5b0c89928f8083c74bbd5099c9292d', 'md5': '5ba36643c77cc3d34ffeadad89937d1e',
'info_dict': { 'info_dict': {
'id': '203704', 'id': '203704',
'ext': 'mp4', 'ext': 'mp4',
'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1513292400, 'timestamp': 1513292400,
'upload_date': '20171214', 'upload_date': '20171214',
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik', 'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
@ -29,6 +30,24 @@ class URPlayIE(InfoExtractor):
'episode': 'Om vetenskap, kritiskt tänkande och motstånd', 'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
'age_limit': 15, 'age_limit': 15,
}, },
}, {
'url': 'https://urplay.se/program/222967-en-foralders-dagbok-mitt-barn-skadar-sig-sjalv',
'info_dict': {
'id': '222967',
'ext': 'mp4',
'title': 'En förälders dagbok : Mitt barn skadar sig själv',
'description': 'md5:9f771eef03a732a213b367b52fe826ca',
'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1629676800,
'upload_date': '20210823',
'series': 'En förälders dagbok',
'duration': 1740,
'age_limit': 15,
'episode_number': 3,
'categories': 'count:2',
'tags': 'count:7',
'episode': 'Mitt barn skadar sig själv',
},
}, { }, {
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
'info_dict': { 'info_dict': {
@ -36,12 +55,17 @@ class URPlayIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde', 'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1440086400, 'timestamp': 1440086400,
'upload_date': '20150820', 'upload_date': '20150820',
'series': 'Tripp, Trapp, Träd', 'series': 'Tripp, Trapp, Träd',
'duration': 865, 'duration': 865,
'age_limit': 1,
'episode_number': 1,
'categories': [],
'tags': ['Sova'], 'tags': ['Sova'],
'episode': 'Sovkudde', 'episode': 'Sovkudde',
'season': 'Säsong 1',
}, },
}, { }, {
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden', 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
@ -69,7 +93,7 @@ def _real_extract(self, url):
urplayer_streams = urplayer_data.get('streamingInfo', {}) urplayer_streams = urplayer_data.get('streamingInfo', {})
for k, v in urplayer_streams.get('raw', {}).items(): for k, v in urplayer_streams.get('raw', {}).items():
if not (k in ('sd', 'hd') and isinstance(v, dict)): if not (k in ('sd', 'hd', 'mp3', 'm4a') and isinstance(v, dict)):
continue continue
file_http = v.get('location') file_http = v.get('location')
if file_http: if file_http:

View file

@ -119,7 +119,7 @@ def _real_extract(self, url):
result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
query = parse_qs(player_url) query = parse_qs(player_url)
random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) random_seed = ''.join(random.choices(string.ascii_letters + string.digits, k=8))
query['_s'] = random_seed query['_s'] = random_seed
query['_t'] = result[:16] query['_t'] = result[:16]

View file

@ -0,0 +1,336 @@
import base64
import functools
import math
import re
import time
import urllib.parse
from .common import InfoExtractor
from .slideslive import SlidesLiveIE
from ..utils import (
ExtractorError,
InAdvancePagedList,
int_or_none,
traverse_obj,
update_url_query,
url_or_none,
)
class VideoKenBaseIE(InfoExtractor):
_ORGANIZATIONS = {
'videos.icts.res.in': 'icts',
'videos.cncf.io': 'cncf',
'videos.neurips.cc': 'neurips',
}
_BASE_URL_RE = rf'https?://(?P<host>{"|".join(map(re.escape, _ORGANIZATIONS))})/'
_PAGE_SIZE = 12
def _get_org_id_and_api_key(self, org, video_id):
details = self._download_json(
f'https://analytics.videoken.com/api/videolake/{org}/details', video_id,
note='Downloading organization ID and API key', headers={
'Accept': 'application/json',
})
return details['id'], details['apikey']
def _create_slideslive_url(self, video_url, video_id, referer):
if not video_url and not video_id:
return
elif not video_url or 'embed/sign-in' in video_url:
video_url = f'https://slideslive.com/embed/{video_id.lstrip("slideslive-")}'
if url_or_none(referer):
return update_url_query(video_url, {
'embed_parent_url': referer,
'embed_container_origin': f'https://{urllib.parse.urlparse(referer).netloc}',
})
return video_url
def _extract_videos(self, videos, url):
for video in traverse_obj(videos, (('videos', 'results'), ...)):
video_id = traverse_obj(video, 'youtube_id', 'videoid')
if not video_id:
continue
ie_key = None
if traverse_obj(video, 'type', 'source') == 'youtube':
video_url = video_id
ie_key = 'Youtube'
else:
video_url = traverse_obj(video, 'embed_url', 'embeddableurl')
if urllib.parse.urlparse(video_url).netloc == 'slideslive.com':
ie_key = SlidesLiveIE
video_url = self._create_slideslive_url(video_url, video_id, url)
if not video_url:
continue
yield self.url_result(video_url, ie_key, video_id)
class VideoKenIE(VideoKenBaseIE):
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic|category)/[^/#?]+/)?video/(?P<id>[\w-]+)'
_TESTS = [{
# neurips -> videoken -> slideslive
'url': 'https://videos.neurips.cc/video/slideslive-38922815',
'info_dict': {
'id': '38922815',
'ext': 'mp4',
'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures',
'timestamp': 1630939331,
'upload_date': '20210906',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:330',
'chapters': 'count:329',
},
'params': {
'skip_download': 'm3u8',
},
'expected_warnings': ['Failed to download VideoKen API JSON'],
}, {
# neurips -> videoken -> slideslive -> youtube
'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348',
'info_dict': {
'id': '2Xa_dt78rJE',
'ext': 'mp4',
'display_id': '38923348',
'title': 'Machine Education',
'description': 'Watch full version of this video at https://slideslive.com/38923348.',
'channel': 'SlidesLive Videos - G2',
'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w',
'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
'uploader': 'SlidesLive Videos - G2',
'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w',
'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
'duration': 2504,
'timestamp': 1618922125,
'upload_date': '20200131',
'age_limit': 0,
'channel_follower_count': int,
'view_count': int,
'availability': 'unlisted',
'live_status': 'not_live',
'playable_in_embed': True,
'categories': ['People & Blogs'],
'tags': [],
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
'thumbnails': 'count:78',
'chapters': 'count:77',
},
'params': {
'skip_download': 'm3u8',
},
'expected_warnings': ['Failed to download VideoKen API JSON'],
}, {
# icts -> videoken -> youtube
'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc',
'info_dict': {
'id': 'zysIsojYdvc',
'ext': 'mp4',
'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad',
'description': 'md5:87433069d79719eeadc1962cc2ace00b',
'channel': 'International Centre for Theoretical Sciences',
'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ',
'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ',
'uploader': 'International Centre for Theoretical Sciences',
'uploader_id': 'ICTStalks',
'uploader_url': 'http://www.youtube.com/user/ICTStalks',
'duration': 3372,
'upload_date': '20191004',
'age_limit': 0,
'live_status': 'not_live',
'availability': 'public',
'playable_in_embed': True,
'channel_follower_count': int,
'like_count': int,
'view_count': int,
'categories': ['Science & Technology'],
'tags': [],
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
'thumbnails': 'count:42',
'chapters': 'count:20',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8',
'only_matching': True,
}, {
'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI',
'only_matching': True,
}, {
'url': 'https://videos.icts.res.in/video/d7HuP_abpKU',
'only_matching': True,
}]
def _real_extract(self, url):
hostname, video_id = self._match_valid_url(url).group('host', 'id')
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id)
details = self._download_json(
'https://analytics.videoken.com/api/videoinfo_private', video_id, query={
'videoid': video_id,
'org_id': org_id,
}, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON',
errnote='Failed to download VideoKen API JSON', fatal=False)
if details:
return next(self._extract_videos({'videos': [details]}, url))
# fallback for API error 400 response
elif video_id.startswith('slideslive-'):
return self.url_result(
self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
elif re.match(r'^[\w-]{11}$', video_id):
self.url_result(video_id, 'Youtube', video_id)
else:
raise ExtractorError('Unable to extract without VideoKen API response')
class VideoKenPlayerIE(VideoKenBaseIE):
_VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)'
_TESTS = [{
'url': 'https://player.videoken.com/embed/slideslive-38968434',
'info_dict': {
'id': '38968434',
'ext': 'mp4',
'title': 'Deep Learning with Label Differential Privacy',
'timestamp': 1643377020,
'upload_date': '20220128',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'thumbnails': 'count:30',
'chapters': 'count:29',
},
'params': {
'skip_download': 'm3u8',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id)
class VideoKenPlaylistIE(VideoKenBaseIE):
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P<id>\d+)'
_TESTS = [{
'url': 'https://videos.icts.res.in/category/1822/playlist/381',
'playlist_mincount': 117,
'info_dict': {
'id': '381',
'title': 'Cosmology - The Next Decade',
},
}]
def _real_extract(self, url):
hostname, playlist_id = self._match_valid_url(url).group('host', 'id')
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id)
videos = self._download_json(
f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/',
playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON')
return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title'))
class VideoKenCategoryIE(VideoKenBaseIE):
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P<id>\d+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://videos.icts.res.in/category/1822/',
'playlist_mincount': 500,
'info_dict': {
'id': '1822',
'title': 'Programs',
},
}, {
'url': 'https://videos.neurips.cc/category/350/',
'playlist_mincount': 34,
'info_dict': {
'id': '350',
'title': 'NeurIPS 2018',
},
}, {
'url': 'https://videos.cncf.io/category/479/',
'playlist_mincount': 328,
'info_dict': {
'id': '479',
'title': 'KubeCon + CloudNativeCon Europe\'19',
},
}]
def _get_category_page(self, category_id, org_id, page=1, note=None):
return self._download_json(
f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id,
fatal=False, note=note if note else f'Downloading category page {page}',
query={
'category_id': category_id,
'page_number': page,
'length': self._PAGE_SIZE,
}, headers={'Accept': 'application/json'}) or {}
def _entries(self, category_id, org_id, url, page):
videos = self._get_category_page(category_id, org_id, page + 1)
yield from self._extract_videos(videos, url)
def _real_extract(self, url):
hostname, category_id = self._match_valid_url(url).group('host', 'id')
org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id)
category_info = self._get_category_page(category_id, org_id, note='Downloading category info')
category = category_info['category_name']
total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE)
return self.playlist_result(InAdvancePagedList(
functools.partial(self._entries, category_id, org_id, url),
total_pages, self._PAGE_SIZE), category_id, category)
class VideoKenTopicIE(VideoKenBaseIE):
_VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P<id>[^/#?]+)/?(?:$|[?#])'
_TESTS = [{
'url': 'https://videos.neurips.cc/topic/machine%20learning/',
'playlist_mincount': 500,
'info_dict': {
'id': 'machine_learning',
'title': 'machine learning',
},
}, {
'url': 'https://videos.icts.res.in/topic/gravitational%20waves/',
'playlist_mincount': 77,
'info_dict': {
'id': 'gravitational_waves',
'title': 'gravitational waves'
},
}, {
'url': 'https://videos.cncf.io/topic/prometheus/',
'playlist_mincount': 134,
'info_dict': {
'id': 'prometheus',
'title': 'prometheus',
},
}]
def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None):
return self._download_json(
'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={
'orgid': org_id,
'size': self._PAGE_SIZE,
'query': topic,
'page': page,
'sort': 'upload_desc',
'filter': 'all',
'token': api_key,
'is_topic': 'true',
'category': '',
'searchid': search_id,
}, headers={'Accept': 'application/json'},
note=note if note else f'Downloading topic page {page}') or {}
def _entries(self, topic, org_id, search_id, api_key, url, page):
videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1)
yield from self._extract_videos(videos, url)
def _real_extract(self, url):
hostname, topic_id = self._match_valid_url(url).group('host', 'id')
topic = urllib.parse.unquote(topic_id)
topic_id = topic.replace(' ', '_')
org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic)
search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode()
total_pages = int_or_none(self._get_topic_page(
topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages'])
return self.playlist_result(InAdvancePagedList(
functools.partial(self._entries, topic, org_id, search_id, api_key, url),
total_pages, self._PAGE_SIZE), topic_id, topic)

View file

@ -251,7 +251,7 @@ def _login(self, country_code, video_id):
return self._user_token return self._user_token
def _get_token(self, country_code, video_id): def _get_token(self, country_code, video_id):
rand = ''.join(random.choice('0123456789') for _ in range(10)) rand = ''.join(random.choices('0123456789', k=10))
return self._download_json( return self._download_json(
f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id, f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id,
headers={'Content-Type': 'application/json'}, note='Getting bearer token', headers={'Content-Type': 'application/json'}, note='Getting bearer token',

View file

@ -0,0 +1,40 @@
from .common import InfoExtractor
class VolejTVIE(InfoExtractor):
_VALID_URL = r'https?://volej\.tv/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://volej.tv/video/725742/',
'info_dict': {
'id': '725742',
'ext': 'mp4',
'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV',
'thumbnail': 'https://volej.tv/images/og/16/17186/og.png',
'title': 'VK Královo Pole vs VK Prostějov',
}
}, {
'url': 'https://volej.tv/video/725605/',
'info_dict': {
'id': '725605',
'ext': 'mp4',
'thumbnail': 'https://volej.tv/images/og/15/17185/og.png',
'title': 'VK Lvi Praha vs VK Euro Sitex Příbram',
'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV',
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
json_data = self._search_json(
r'<\s*!\[CDATA[^=]+=', webpage, 'CDATA', video_id)
formats, subtitle = self._extract_m3u8_formats_and_subtitles(
json_data['urls']['hls'], video_id)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
'formats': formats,
'subtitles': subtitle,
}

View file

@ -30,7 +30,7 @@ def _call_api(self, path, video_id, note, data=None):
base_url = self._API_DOMAIN + '/core/' + path base_url = self._API_DOMAIN + '/core/' + path
query = [ query = [
('oauth_consumer_key', self._API_PARAMS['oAuthKey']), ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])), ('oauth_nonce', ''.join(random.choices(string.ascii_letters, k=32))),
('oauth_signature_method', 'HMAC-SHA1'), ('oauth_signature_method', 'HMAC-SHA1'),
('oauth_timestamp', int(time.time())), ('oauth_timestamp', int(time.time())),
] ]

View file

@ -6,12 +6,15 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest,
determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_qs, parse_qs,
traverse_obj, traverse_obj,
try_get, try_get,
update_url_query, update_url_query,
urlhandle_detect_ext,
) )
@ -34,6 +37,16 @@ def _download_embed_config(self, config_type, config_id, referer):
return embed_config return embed_config
def _get_real_ext(self, url):
ext = determine_ext(url, default_ext='bin')
if ext == 'bin':
urlh = self._request_webpage(
HEADRequest(url), None, note='Checking media extension',
errnote='HEAD request returned error', fatal=False)
if urlh:
ext = urlhandle_detect_ext(urlh, default='bin')
return 'mp4' if ext == 'mov' else ext
def _extract_media(self, embed_config): def _extract_media(self, embed_config):
data = embed_config['media'] data = embed_config['media']
video_id = data['hashedId'] video_id = data['hashedId']
@ -51,13 +64,13 @@ def _extract_media(self, embed_config):
continue continue
elif atype in ('still', 'still_image'): elif atype in ('still', 'still_image'):
thumbnails.append({ thumbnails.append({
'url': aurl, 'url': aurl.replace('.bin', f'.{self._get_real_ext(aurl)}'),
'width': int_or_none(a.get('width')), 'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')), 'height': int_or_none(a.get('height')),
'filesize': int_or_none(a.get('size')), 'filesize': int_or_none(a.get('size')),
}) })
else: else:
aext = a.get('ext') aext = a.get('ext') or self._get_real_ext(aurl)
display_name = a.get('display_name') display_name = a.get('display_name')
format_id = atype format_id = atype
if atype and atype.endswith('_video') and display_name: if atype and atype.endswith('_video') and display_name:
@ -169,26 +182,26 @@ class WistiaIE(WistiaBaseIE):
'md5': '10c1ce9c4dde638202513ed17a3767bd', 'md5': '10c1ce9c4dde638202513ed17a3767bd',
'info_dict': { 'info_dict': {
'id': 'a6ndpko1wg', 'id': 'a6ndpko1wg',
'ext': 'bin', 'ext': 'mp4',
'title': 'Episode 2: Boxed Water\'s retention is thirsty', 'title': 'Episode 2: Boxed Water\'s retention is thirsty',
'upload_date': '20210324', 'upload_date': '20210324',
'description': 'md5:da5994c2c2d254833b412469d9666b7a', 'description': 'md5:da5994c2c2d254833b412469d9666b7a',
'duration': 966.0, 'duration': 966.0,
'timestamp': 1616614369, 'timestamp': 1616614369,
'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.bin', 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.png',
} }
}, { }, {
'url': 'wistia:5vd7p4bct5', 'url': 'wistia:5vd7p4bct5',
'md5': 'b9676d24bf30945d97060638fbfe77f0', 'md5': 'b9676d24bf30945d97060638fbfe77f0',
'info_dict': { 'info_dict': {
'id': '5vd7p4bct5', 'id': '5vd7p4bct5',
'ext': 'bin', 'ext': 'mp4',
'title': 'md5:eaa9f64c4efd7b5f098b9b6118597679', 'title': 'md5:eaa9f64c4efd7b5f098b9b6118597679',
'description': 'md5:a9bea0315f0616aa5df2dc413ddcdd0f', 'description': 'md5:a9bea0315f0616aa5df2dc413ddcdd0f',
'upload_date': '20220915', 'upload_date': '20220915',
'timestamp': 1663258727, 'timestamp': 1663258727,
'duration': 623.019, 'duration': 623.019,
'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.(?:jpg|bin)$', 'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.jpg$',
}, },
}, { }, {
'url': 'wistia:sh7fpupwlt', 'url': 'wistia:sh7fpupwlt',
@ -208,25 +221,25 @@ class WistiaIE(WistiaBaseIE):
'url': 'https://www.weidert.com/blog/wistia-channels-video-marketing-tool', 'url': 'https://www.weidert.com/blog/wistia-channels-video-marketing-tool',
'info_dict': { 'info_dict': {
'id': 'cqwukac3z1', 'id': 'cqwukac3z1',
'ext': 'bin', 'ext': 'mp4',
'title': 'How Wistia Channels Can Help Capture Inbound Value From Your Video Content', 'title': 'How Wistia Channels Can Help Capture Inbound Value From Your Video Content',
'duration': 158.125, 'duration': 158.125,
'timestamp': 1618974400, 'timestamp': 1618974400,
'description': 'md5:27abc99a758573560be72600ef95cece', 'description': 'md5:27abc99a758573560be72600ef95cece',
'upload_date': '20210421', 'upload_date': '20210421',
'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.bin', 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.jpg',
} }
}, { }, {
'url': 'https://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', 'url': 'https://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
'md5': 'b9676d24bf30945d97060638fbfe77f0', 'md5': 'b9676d24bf30945d97060638fbfe77f0',
'info_dict': { 'info_dict': {
'id': '5vd7p4bct5', 'id': '5vd7p4bct5',
'ext': 'bin', 'ext': 'mp4',
'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england', 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
'upload_date': '20220915', 'upload_date': '20220915',
'timestamp': 1663258727, 'timestamp': 1663258727,
'duration': 623.019, 'duration': 623.019,
'thumbnail': 'https://embed-ssl.wistia.com/deliveries/83e6ec693e2c05a0ce65809cbaead86a.bin', 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/83e6ec693e2c05a0ce65809cbaead86a.jpg',
'description': 'a Paywall Videos video', 'description': 'a Paywall Videos video',
}, },
}] }]
@ -302,9 +315,9 @@ class WistiaChannelIE(WistiaBaseIE):
'url': 'https://fast.wistia.net/embed/channel/3802iirk0l?wchannelid=3802iirk0l&wmediaid=sp5dqjzw3n', 'url': 'https://fast.wistia.net/embed/channel/3802iirk0l?wchannelid=3802iirk0l&wmediaid=sp5dqjzw3n',
'info_dict': { 'info_dict': {
'id': 'sp5dqjzw3n', 'id': 'sp5dqjzw3n',
'ext': 'bin', 'ext': 'mp4',
'title': 'The Roof S2: The Modern CRO', 'title': 'The Roof S2: The Modern CRO',
'thumbnail': 'https://embed-ssl.wistia.com/deliveries/dadfa9233eaa505d5e0c85c23ff70741.bin', 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/dadfa9233eaa505d5e0c85c23ff70741.png',
'duration': 86.487, 'duration': 86.487,
'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n', 'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n',
'timestamp': 1619790290, 'timestamp': 1619790290,
@ -334,12 +347,12 @@ class WistiaChannelIE(WistiaBaseIE):
'info_dict': { 'info_dict': {
'id': 'pz0m0l0if3', 'id': 'pz0m0l0if3',
'title': 'A Framework for Improving Product Team Performance', 'title': 'A Framework for Improving Product Team Performance',
'ext': 'bin', 'ext': 'mp4',
'timestamp': 1653935275, 'timestamp': 1653935275,
'upload_date': '20220530', 'upload_date': '20220530',
'description': 'Learn how to help your company improve and achieve your product related goals.', 'description': 'Learn how to help your company improve and achieve your product related goals.',
'duration': 1854.39, 'duration': 1854.39,
'thumbnail': 'https://embed-ssl.wistia.com/deliveries/12fd19e56413d9d6f04e2185c16a6f8854e25226.bin', 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/12fd19e56413d9d6f04e2185c16a6f8854e25226.png',
}, },
'params': {'noplaylist': True, 'skip_download': True}, 'params': {'noplaylist': True, 'skip_download': True},
}] }]

View file

@ -0,0 +1,51 @@
import re
from .common import InfoExtractor
from ..utils import int_or_none
class XanimuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?xanimu\.com/(?P<id>[^/]+)/?'
_TESTS = [{
'url': 'https://xanimu.com/51944-the-princess-the-frog-hentai/',
'md5': '899b88091d753d92dad4cb63bbf357a7',
'info_dict': {
'id': '51944-the-princess-the-frog-hentai',
'ext': 'mp4',
'title': 'The Princess + The Frog Hentai',
'thumbnail': 'https://xanimu.com/storage/2020/09/the-princess-and-the-frog-hentai.jpg',
'description': r're:^Enjoy The Princess \+ The Frog Hentai',
'duration': 207.0,
'age_limit': 18
}
}, {
'url': 'https://xanimu.com/huge-expansion/',
'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
formats = []
for format in ['videoHigh', 'videoLow']:
format_url = self._search_json(r'var\s+%s\s*=' % re.escape(format), webpage, format,
video_id, default=None, contains_pattern=r'[\'"]([^\'"]+)[\'"]')
if format_url:
formats.append({
'url': format_url,
'format_id': format,
'quality': -2 if format.endswith('Low') else None,
})
return {
'id': video_id,
'formats': formats,
'title': self._search_regex(r'[\'"]headline[\'"]:\s*[\'"]([^"]+)[\'"]', webpage,
'title', default=None) or self._html_extract_title(webpage),
'thumbnail': self._html_search_meta('thumbnailUrl', webpage, default=None),
'description': self._html_search_meta('description', webpage, default=None),
'duration': int_or_none(self._search_regex(r'duration:\s*[\'"]([^\'"]+?)[\'"]',
webpage, 'duration', fatal=False)),
'age_limit': 18
}

View file

@ -270,9 +270,9 @@ def _real_extract(self, url):
for s_url in stream_urls: for s_url in stream_urls:
ext = determine_ext(s_url) ext = determine_ext(s_url)
if ext == 'mpd': if ext == 'mpd':
formats.extend(self._extract_mpd_formats(s_url, id, mpd_id='dash')) formats.extend(self._extract_mpd_formats(s_url, video_id, mpd_id='dash'))
elif ext == 'm3u8': elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(s_url, id, 'mp4')) formats.extend(self._extract_m3u8_formats(s_url, video_id, 'mp4'))
return { return {
'id': video_id, 'id': video_id,
'title': video_json.get('title') or self._og_search_title(webpage), 'title': video_json.get('title') or self._og_search_title(webpage),

View file

@ -96,31 +96,41 @@ class YoukuIE(InfoExtractor):
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'uploader': '明月庄主moon', 'uploader': '明月庄主moon',
'uploader_id': '38465621', 'uploader_id': '38465621',
'uploader_url': 'http://i.youku.com/u/UMTUzODYyNDg0', 'uploader_url': 'https://www.youku.com/profile/index/?uid=UMTUzODYyNDg0',
'tags': list, 'tags': list,
}, },
}, { }, {
'url': 'http://video.tudou.com/v/XMjIyNzAzMTQ4NA==.html?f=46177805', 'url': 'https://v.youku.com/v_show/id_XNTA2NTA0MjA1Mg==.html',
'info_dict': { 'info_dict': {
'id': 'XMjIyNzAzMTQ4NA', 'id': 'XNTA2NTA0MjA1Mg',
'ext': 'mp4', 'ext': 'mp4',
'title': '卡马乔国足开大脚长传冲吊集锦', 'title': 'Minecraft我的世界建造超大巨型航空飞机菜鸟vs高手vs黑客',
'duration': 289, 'duration': 542.13,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'uploader': '阿卜杜拉之星', 'uploader': '波哥游戏解说',
'uploader_id': '2382249', 'uploader_id': '156688084',
'uploader_url': 'http://i.youku.com/u/UOTUyODk5Ng==', 'uploader_url': 'https://www.youku.com/profile/index/?uid=UNjI2NzUyMzM2',
'tags': list, 'tags': list,
}, },
}, { }, {
'url': 'http://video.tudou.com/v/XMjE4ODI3OTg2MA==.html', 'url': 'https://v.youku.com/v_show/id_XNTE1MzczOTg4MA==.html',
'only_matching': True, 'info_dict': {
'id': 'XNTE1MzczOTg4MA',
'ext': 'mp4',
'title': '国产超A特工片',
'duration': 362.97,
'thumbnail': r're:^https?://.*',
'uploader': '陈晓娟说历史',
'uploader_id': '1640913339',
'uploader_url': 'https://www.youku.com/profile/index/?uid=UNjU2MzY1MzM1Ng==',
'tags': list,
},
}] }]
@staticmethod @staticmethod
def get_ysuid(): def get_ysuid():
return '%d%s' % (int(time.time()), ''.join([ return '%d%s' % (int(time.time()), ''.join(
random.choice(string.ascii_letters) for i in range(3)])) random.choices(string.ascii_letters, k=3)))
def get_format_name(self, fm): def get_format_name(self, fm):
_dict = { _dict = {
@ -151,7 +161,7 @@ def _real_extract(self, url):
# request basic data # request basic data
basic_data_params = { basic_data_params = {
'vid': video_id, 'vid': video_id,
'ccode': '0532', 'ccode': '0524',
'client_ip': '192.168.1.1', 'client_ip': '192.168.1.1',
'utid': cna, 'utid': cna,
'client_ts': time.time() / 1000, 'client_ts': time.time() / 1000,

View file

@ -292,7 +292,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors""" """Provide base functions for Youtube extractors"""
_RESERVED_NAMES = ( _RESERVED_NAMES = (
r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|' r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|' r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
r'browse|oembed|get_video_info|iframe_api|s/player|source|' r'browse|oembed|get_video_info|iframe_api|s/player|source|'
r'storefront|oops|index|account|t/terms|about|upload|signin|logout') r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
@ -2544,6 +2544,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'tags': [], 'tags': [],
}, },
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'}, 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
}, {
'note': 'Audio formats with Dynamic Range Compression',
'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
'info_dict': {
'id': 'Tq92D6wQ1mg',
'ext': 'weba',
'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
'channel_follower_count': int,
'description': 'md5:17eccca93a786d51bc67646756894066',
'upload_date': '20191228',
'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
'playable_in_embed': True,
'like_count': int,
'categories': ['Entertainment'],
'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
'age_limit': 18,
'channel': 'Projekt Melody',
'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
'view_count': int,
'availability': 'needs_auth',
'comment_count': int,
'live_status': 'not_live',
'uploader': 'Projekt Melody',
'duration': 106,
},
'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
} }
] ]
@ -2621,18 +2650,19 @@ def mpd_feed(format_id, delay):
""" """
@returns (manifest_url, manifest_stream_number, is_live) or None @returns (manifest_url, manifest_stream_number, is_live) or None
""" """
for retry in self.RetryManager(fatal=False):
with lock: with lock:
refetch_manifest(format_id, delay) refetch_manifest(format_id, delay)
f = next((f for f in formats if f['format_id'] == format_id), None) f = next((f for f in formats if f['format_id'] == format_id), None)
if not f: if not f:
if not is_live: if not is_live:
self.to_screen(f'{video_id}: Video is no longer live') retry.error = f'{video_id}: Video is no longer live'
else: else:
self.report_warning( retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}') continue
return None
return f['manifest_url'], f['manifest_stream_number'], is_live return f['manifest_url'], f['manifest_stream_number'], is_live
return None
for f in formats: for f in formats:
f['is_live'] = is_live f['is_live'] = is_live
@ -3553,7 +3583,7 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
itag = str_or_none(fmt.get('itag')) itag = str_or_none(fmt.get('itag'))
audio_track = fmt.get('audioTrack') or {} audio_track = fmt.get('audioTrack') or {}
stream_id = '%s.%s' % (itag or '', audio_track.get('id', '')) stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
if stream_id in stream_ids: if stream_id in stream_ids:
continue continue
@ -3634,11 +3664,12 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
dct = { dct = {
'asr': int_or_none(fmt.get('audioSampleRate')), 'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')), 'filesize': int_or_none(fmt.get('contentLength')),
'format_id': itag, 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
'format_note': join_nonempty( 'format_note': join_nonempty(
'%s%s' % (audio_track.get('displayName') or '', '%s%s' % (audio_track.get('displayName') or '',
' (default)' if language_preference > 0 else ''), ' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
'DRC' if fmt.get('isDrc') else None,
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
@ -3647,13 +3678,13 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
'fps': int_or_none(fmt.get('fps')) or None, 'fps': int_or_none(fmt.get('fps')) or None,
'audio_channels': fmt.get('audioChannels'), 'audio_channels': fmt.get('audioChannels'),
'height': height, 'height': height,
'quality': q(quality), 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
'has_drm': bool(fmt.get('drmFamilies')), 'has_drm': bool(fmt.get('drmFamilies')),
'tbr': tbr, 'tbr': tbr,
'url': fmt_url, 'url': fmt_url,
'width': int_or_none(fmt.get('width')), 'width': int_or_none(fmt.get('width')),
'language': join_nonempty(audio_track.get('id', '').split('.')[0], 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else ''), 'desc' if language_preference < -1 else '') or None,
'language_preference': language_preference, 'language_preference': language_preference,
# Strictly de-prioritize damaged and 3gp formats # Strictly de-prioritize damaged and 3gp formats
'preference': -10 if is_damaged else -2 if itag == '17' else None, 'preference': -10 if is_damaged else -2 if itag == '17' else None,

View file

@ -29,6 +29,8 @@
expand_path, expand_path,
format_field, format_field,
get_executable_path, get_executable_path,
get_system_config_dirs,
get_user_config_dirs,
join_nonempty, join_nonempty,
orderedSet_from_options, orderedSet_from_options,
remove_end, remove_end,
@ -38,71 +40,55 @@
def parseOpts(overrideArguments=None, ignore_config_files='if_override'): def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
PACKAGE_NAME = 'yt-dlp'
root = Config(create_parser()) root = Config(create_parser())
if ignore_config_files == 'if_override': if ignore_config_files == 'if_override':
ignore_config_files = overrideArguments is not None ignore_config_files = overrideArguments is not None
def _readUserConf(package_name, default=[]): def read_config(*paths):
# .config path = os.path.join(*paths)
xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config') conf = Config.read_file(path, default=None)
userConfFile = os.path.join(xdg_config_home, package_name, 'config') if conf is not None:
if not os.path.isfile(userConfFile): return conf, path
userConfFile = os.path.join(xdg_config_home, '%s.conf' % package_name)
userConf = Config.read_file(userConfFile, default=None)
if userConf is not None:
return userConf, userConfFile
# appdata def _load_from_config_dirs(config_dirs):
appdata_dir = os.getenv('appdata') for config_dir in config_dirs:
if appdata_dir: head, tail = os.path.split(config_dir)
userConfFile = os.path.join(appdata_dir, package_name, 'config') assert tail == PACKAGE_NAME or config_dir == os.path.join(compat_expanduser('~'), f'.{PACKAGE_NAME}')
userConf = Config.read_file(userConfFile, default=None)
if userConf is None:
userConfFile += '.txt'
userConf = Config.read_file(userConfFile, default=None)
if userConf is not None:
return userConf, userConfFile
# home yield read_config(head, f'{PACKAGE_NAME}.conf')
userConfFile = os.path.join(compat_expanduser('~'), '%s.conf' % package_name) if tail.startswith('.'): # ~/.PACKAGE_NAME
userConf = Config.read_file(userConfFile, default=None) yield read_config(head, f'{PACKAGE_NAME}.conf.txt')
if userConf is None: yield read_config(config_dir, 'config')
userConfFile += '.txt' yield read_config(config_dir, 'config.txt')
userConf = Config.read_file(userConfFile, default=None)
if userConf is not None:
return userConf, userConfFile
return default, None def add_config(label, path=None, func=None):
def add_config(label, path, user=False):
""" Adds config and returns whether to continue """ """ Adds config and returns whether to continue """
if root.parse_known_args()[0].ignoreconfig: if root.parse_known_args()[0].ignoreconfig:
return False return False
# Multiple package names can be given here elif func:
# E.g. ('yt-dlp', 'youtube-dlc', 'youtube-dl') will look for assert path is None
# the configuration file of any of these three packages args, current_path = next(
for package in ('yt-dlp',): filter(None, _load_from_config_dirs(func(PACKAGE_NAME))), (None, None))
if user:
args, current_path = _readUserConf(package, default=None)
else: else:
current_path = os.path.join(path, '%s.conf' % package) current_path = os.path.join(path, 'yt-dlp.conf')
args = Config.read_file(current_path, default=None) args = Config.read_file(current_path, default=None)
if args is not None: if args is not None:
root.append_config(args, current_path, label=label) root.append_config(args, current_path, label=label)
return True return True
return True
def load_configs(): def load_configs():
yield not ignore_config_files yield not ignore_config_files
yield add_config('Portable', get_executable_path()) yield add_config('Portable', get_executable_path())
yield add_config('Home', expand_path(root.parse_known_args()[0].paths.get('home', '')).strip()) yield add_config('Home', expand_path(root.parse_known_args()[0].paths.get('home', '')).strip())
yield add_config('User', None, user=True) yield add_config('User', func=get_user_config_dirs)
yield add_config('System', '/etc') yield add_config('System', func=get_system_config_dirs)
opts = optparse.Values({'verbose': True, 'print_help': False}) opts = optparse.Values({'verbose': True, 'print_help': False})
try: try:
try: try:
if overrideArguments: if overrideArguments is not None:
root.append_config(overrideArguments, label='Override') root.append_config(overrideArguments, label='Override')
else: else:
root.append_config(sys.argv[1:], label='Command-line') root.append_config(sys.argv[1:], label='Command-line')
@ -277,6 +263,20 @@ def _dict_from_options_callback(
out_dict[key] = out_dict.get(key, []) + [val] if append else val out_dict[key] = out_dict.get(key, []) + [val] if append else val
setattr(parser.values, option.dest, out_dict) setattr(parser.values, option.dest, out_dict)
def when_prefix(default):
return {
'default': {},
'type': 'str',
'action': 'callback',
'callback': _dict_from_options_callback,
'callback_kwargs': {
'allowed_keys': '|'.join(map(re.escape, POSTPROCESS_WHEN)),
'default_key': default,
'multiple_keys': False,
'append': True,
},
}
parser = _YoutubeDLOptionParser() parser = _YoutubeDLOptionParser()
alias_group = optparse.OptionGroup(parser, 'Aliases') alias_group = optparse.OptionGroup(parser, 'Aliases')
Formatter = string.Formatter() Formatter = string.Formatter()
@ -443,12 +443,14 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'allowed_values': { 'allowed_values': {
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
}, 'aliases': { }, 'aliases': {
'youtube-dl': ['all', '-multistreams'], 'youtube-dl': ['all', '-multistreams'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': ['no-external-downloader-progress'],
} }
}, help=( }, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc ' 'Options that can help keep compatibility with youtube-dl or youtube-dlc '
@ -493,6 +495,11 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
action='store_const', const='::', dest='source_address', action='store_const', const='::', dest='source_address',
help='Make all connections via IPv6', help='Make all connections via IPv6',
) )
network.add_option(
'--enable-file-urls', action='store_true',
dest='enable_file_urls', default=False,
help='Enable file:// URLs. This is disabled by default for security reasons.'
)
geo = optparse.OptionGroup(parser, 'Geo-restriction') geo = optparse.OptionGroup(parser, 'Geo-restriction')
geo.add_option( geo.add_option(
@ -559,8 +566,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'--date', '--date',
metavar='DATE', dest='date', default=None, metavar='DATE', dest='date', default=None,
help=( help=(
'Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format ' 'Download only videos uploaded on this date. '
'[now|today|yesterday][-N[day|week|month|year]]. E.g. --date today-2weeks')) 'The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. '
'E.g. "--date today-2weeks" downloads only videos uploaded on the same day two weeks ago'))
selection.add_option( selection.add_option(
'--datebefore', '--datebefore',
metavar='DATE', dest='datebefore', default=None, metavar='DATE', dest='datebefore', default=None,
@ -875,11 +883,11 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'This option can be used multiple times to set the sleep for the different retry types, ' 'This option can be used multiple times to set the sleep for the different retry types, '
'e.g. --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20')) 'e.g. --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20'))
downloader.add_option( downloader.add_option(
'--skip-unavailable-fragments', '--no-abort-on-unavailable-fragment', '--skip-unavailable-fragments', '--no-abort-on-unavailable-fragments',
action='store_true', dest='skip_unavailable_fragments', default=True, action='store_true', dest='skip_unavailable_fragments', default=True,
help='Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) (Alias: --no-abort-on-unavailable-fragment)') help='Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) (Alias: --no-abort-on-unavailable-fragments)')
downloader.add_option( downloader.add_option(
'--abort-on-unavailable-fragment', '--no-skip-unavailable-fragments', '--abort-on-unavailable-fragments', '--no-skip-unavailable-fragments',
action='store_false', dest='skip_unavailable_fragments', action='store_false', dest='skip_unavailable_fragments',
help='Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments)') help='Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments)')
downloader.add_option( downloader.add_option(
@ -1086,28 +1094,16 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
help='Do not download the video but write all related files (Alias: --no-download)') help='Do not download the video but write all related files (Alias: --no-download)')
verbosity.add_option( verbosity.add_option(
'-O', '--print', '-O', '--print',
metavar='[WHEN:]TEMPLATE', dest='forceprint', default={}, type='str', metavar='[WHEN:]TEMPLATE', dest='forceprint', **when_prefix('video'),
action='callback', callback=_dict_from_options_callback, help=(
callback_kwargs={
'allowed_keys': 'video|' + '|'.join(map(re.escape, POSTPROCESS_WHEN)),
'default_key': 'video',
'multiple_keys': False,
'append': True,
}, help=(
'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". ' 'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". '
'Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). ' 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: video). '
'Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. ' 'Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. '
'This option can be used multiple times')) 'This option can be used multiple times'))
verbosity.add_option( verbosity.add_option(
'--print-to-file', '--print-to-file',
metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', default={}, type='str', nargs=2, metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', nargs=2, **when_prefix('video'),
action='callback', callback=_dict_from_options_callback, help=(
callback_kwargs={
'allowed_keys': 'video|' + '|'.join(map(re.escape, POSTPROCESS_WHEN)),
'default_key': 'video',
'multiple_keys': False,
'append': True,
}, help=(
'Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. ' 'Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. '
'FILE uses the same syntax as the output template. This option can be used multiple times')) 'FILE uses the same syntax as the output template. This option can be used multiple times'))
verbosity.add_option( verbosity.add_option(
@ -1584,14 +1580,16 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
help=optparse.SUPPRESS_HELP) help=optparse.SUPPRESS_HELP)
postproc.add_option( postproc.add_option(
'--parse-metadata', '--parse-metadata',
metavar='FROM:TO', dest='parse_metadata', action='append', metavar='[WHEN:]FROM:TO', dest='parse_metadata', **when_prefix('pre_process'),
help=( help=(
'Parse additional metadata like title/artist from other fields; ' 'Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details. '
'see "MODIFYING METADATA" for details')) 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process)'))
postproc.add_option( postproc.add_option(
'--replace-in-metadata', '--replace-in-metadata',
dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3, dest='parse_metadata', metavar='[WHEN:]FIELDS REGEX REPLACE', nargs=3, **when_prefix('pre_process'),
help='Replace text in a metadata field using the given regex. This option can be used multiple times') help=(
'Replace text in a metadata field using the given regex. This option can be used multiple times. '
'Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process)'))
postproc.add_option( postproc.add_option(
'--xattrs', '--xattr', '--xattrs', '--xattr',
action='store_true', dest='xattrs', default=False, action='store_true', dest='xattrs', default=False,
@ -1629,19 +1627,13 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
help='Location of the ffmpeg binary; either the path to the binary or its containing directory') help='Location of the ffmpeg binary; either the path to the binary or its containing directory')
postproc.add_option( postproc.add_option(
'--exec', '--exec',
metavar='[WHEN:]CMD', dest='exec_cmd', default={}, type='str', metavar='[WHEN:]CMD', dest='exec_cmd', **when_prefix('after_move'),
action='callback', callback=_dict_from_options_callback, help=(
callback_kwargs={ 'Execute a command, optionally prefixed with when to execute it, separated by a ":". '
'allowed_keys': '|'.join(map(re.escape, POSTPROCESS_WHEN)), 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). '
'default_key': 'after_move',
'multiple_keys': False,
'append': True,
}, help=(
'Execute a command, optionally prefixed with when to execute it (after_move if unspecified), separated by a ":". '
'Supported values of "WHEN" are the same as that of --use-postprocessor. '
'Same syntax as the output template can be used to pass any field as arguments to the command. ' 'Same syntax as the output template can be used to pass any field as arguments to the command. '
'After download, an additional field "filepath" that contains the final path of the downloaded file ' 'After download, an additional field "filepath" that contains the final path of the downloaded file '
'is also available, and if no fields are passed, %(filepath)q is appended to the end of the command. ' 'is also available, and if no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '
'This option can be used multiple times')) 'This option can be used multiple times'))
postproc.add_option( postproc.add_option(
'--no-exec', '--no-exec',
@ -1714,7 +1706,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' 'ARGS are a semicolon ";" delimited list of NAME=VALUE. '
'The "when" argument determines when the postprocessor is invoked. ' 'The "when" argument determines when the postprocessor is invoked. '
'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), '
'"before_dl" (before each video download), "post_process" (after each video download; default), ' '"video" (after --format; before --print/--output), "before_dl" (before each video download), '
'"post_process" (after each video download; default), '
'"after_move" (after moving video file to it\'s final locations), ' '"after_move" (after moving video file to it\'s final locations), '
'"after_video" (after downloading and processing all formats of a video), ' '"after_video" (after downloading and processing all formats of a video), '
'or "playlist" (at end of playlist). ' 'or "playlist" (at end of playlist). '

170
yt_dlp/plugins.py Normal file
View file

@ -0,0 +1,170 @@
import contextlib
import importlib
import importlib.abc
import importlib.machinery
import importlib.util
import inspect
import itertools
import pkgutil
import sys
import traceback
import zipimport
from pathlib import Path
from zipfile import ZipFile
from .compat import functools # isort: split
from .utils import (
get_executable_path,
get_system_config_dirs,
get_user_config_dirs,
orderedSet,
write_string,
)
PACKAGE_NAME = 'yt_dlp_plugins'
COMPAT_PACKAGE_NAME = 'ytdlp_plugins'
class PluginLoader(importlib.abc.Loader):
"""Dummy loader for virtual namespace packages"""
def exec_module(self, module):
return None
@functools.cache
def dirs_in_zip(archive):
with ZipFile(archive) as zip:
return set(itertools.chain.from_iterable(
Path(file).parents for file in zip.namelist()))
class PluginFinder(importlib.abc.MetaPathFinder):
"""
This class provides one or multiple namespace packages.
It searches in sys.path and yt-dlp config folders for
the existing subdirectories from which the modules can be imported
"""
def __init__(self, *packages):
self._zip_content_cache = {}
self.packages = set(itertools.chain.from_iterable(
itertools.accumulate(name.split('.'), lambda a, b: '.'.join((a, b)))
for name in packages))
def search_locations(self, fullname):
candidate_locations = []
def _get_package_paths(*root_paths, containing_folder='plugins'):
for config_dir in orderedSet(map(Path, root_paths), lazy=True):
plugin_dir = config_dir / containing_folder
if not plugin_dir.is_dir():
continue
yield from plugin_dir.iterdir()
# Load from yt-dlp config folders
candidate_locations.extend(_get_package_paths(
*get_user_config_dirs('yt-dlp'),
*get_system_config_dirs('yt-dlp'),
containing_folder='plugins'))
# Load from yt-dlp-plugins folders
candidate_locations.extend(_get_package_paths(
get_executable_path(),
*get_user_config_dirs(''),
*get_system_config_dirs(''),
containing_folder='yt-dlp-plugins'))
candidate_locations.extend(map(Path, sys.path)) # PYTHONPATH
parts = Path(*fullname.split('.'))
locations = set()
for path in dict.fromkeys(candidate_locations):
candidate = path / parts
if candidate.is_dir():
locations.add(str(candidate))
elif path.name and any(path.with_suffix(suffix).is_file() for suffix in {'.zip', '.egg', '.whl'}):
with contextlib.suppress(FileNotFoundError):
if parts in dirs_in_zip(path):
locations.add(str(candidate))
return locations
def find_spec(self, fullname, path=None, target=None):
if fullname not in self.packages:
return None
search_locations = self.search_locations(fullname)
if not search_locations:
return None
spec = importlib.machinery.ModuleSpec(fullname, PluginLoader(), is_package=True)
spec.submodule_search_locations = search_locations
return spec
def invalidate_caches(self):
dirs_in_zip.cache_clear()
for package in self.packages:
if package in sys.modules:
del sys.modules[package]
def directories():
spec = importlib.util.find_spec(PACKAGE_NAME)
return spec.submodule_search_locations if spec else []
def iter_modules(subpackage):
fullname = f'{PACKAGE_NAME}.{subpackage}'
with contextlib.suppress(ModuleNotFoundError):
pkg = importlib.import_module(fullname)
yield from pkgutil.iter_modules(path=pkg.__path__, prefix=f'{fullname}.')
def load_module(module, module_name, suffix):
return inspect.getmembers(module, lambda obj: (
inspect.isclass(obj)
and obj.__name__.endswith(suffix)
and obj.__module__.startswith(module_name)
and not obj.__name__.startswith('_')
and obj.__name__ in getattr(module, '__all__', [obj.__name__])))
def load_plugins(name, suffix):
classes = {}
for finder, module_name, _ in iter_modules(name):
if any(x.startswith('_') for x in module_name.split('.')):
continue
try:
if sys.version_info < (3, 10) and isinstance(finder, zipimport.zipimporter):
# zipimporter.load_module() is deprecated in 3.10 and removed in 3.12
# The exec_module branch below is the replacement for >= 3.10
# See: https://docs.python.org/3/library/zipimport.html#zipimport.zipimporter.exec_module
module = finder.load_module(module_name)
else:
spec = finder.find_spec(module_name)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
except Exception:
write_string(f'Error while importing module {module_name!r}\n{traceback.format_exc(limit=-1)}')
continue
classes.update(load_module(module, module_name, suffix))
# Compat: old plugin system using __init__.py
# Note: plugins imported this way do not show up in directories()
# nor are considered part of the yt_dlp_plugins namespace package
with contextlib.suppress(FileNotFoundError):
spec = importlib.util.spec_from_file_location(
name, Path(get_executable_path(), COMPAT_PACKAGE_NAME, name, '__init__.py'))
plugins = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = plugins
spec.loader.exec_module(plugins)
classes.update(load_module(plugins, spec.name, suffix))
return classes
sys.meta_path.insert(0, PluginFinder(f'{PACKAGE_NAME}.extractor', f'{PACKAGE_NAME}.postprocessor'))
__all__ = ['directories', 'load_plugins', 'PACKAGE_NAME', 'COMPAT_PACKAGE_NAME']

View file

@ -33,14 +33,15 @@
from .sponskrub import SponSkrubPP from .sponskrub import SponSkrubPP
from .sponsorblock import SponsorBlockPP from .sponsorblock import SponsorBlockPP
from .xattrpp import XAttrMetadataPP from .xattrpp import XAttrMetadataPP
from ..utils import load_plugins from ..plugins import load_plugins
_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals()) _PLUGIN_CLASSES = load_plugins('postprocessor', 'PP')
def get_postprocessor(key): def get_postprocessor(key):
return globals()[key + 'PP'] return globals()[key + 'PP']
globals().update(_PLUGIN_CLASSES)
__all__ = [name for name in globals().keys() if name.endswith('PP')] __all__ = [name for name in globals().keys() if name.endswith('PP')]
__all__.extend(('PostProcessor', 'FFmpegPostProcessor')) __all__.extend(('PostProcessor', 'FFmpegPostProcessor'))

View file

@ -44,6 +44,7 @@
'ts': 'mpegts', 'ts': 'mpegts',
'wma': 'asf', 'wma': 'asf',
'wmv': 'asf', 'wmv': 'asf',
'weba': 'webm',
'vtt': 'webvtt', 'vtt': 'webvtt',
} }
ACODECS = { ACODECS = {
@ -407,7 +408,7 @@ def concat_files(self, in_files, out_file, concat_opts=None):
""" """
concat_file = f'{out_file}.concat' concat_file = f'{out_file}.concat'
self.write_debug(f'Writing concat spec to {concat_file}') self.write_debug(f'Writing concat spec to {concat_file}')
with open(concat_file, 'wt', encoding='utf-8') as f: with open(concat_file, 'w', encoding='utf-8') as f:
f.writelines(self._concat_spec(in_files, concat_opts)) f.writelines(self._concat_spec(in_files, concat_opts))
out_flags = list(self.stream_copy_opts(ext=determine_ext(out_file))) out_flags = list(self.stream_copy_opts(ext=determine_ext(out_file)))
@ -711,7 +712,7 @@ def run(self, info):
@staticmethod @staticmethod
def _get_chapter_opts(chapters, metadata_filename): def _get_chapter_opts(chapters, metadata_filename):
with open(metadata_filename, 'wt', encoding='utf-8') as f: with open(metadata_filename, 'w', encoding='utf-8') as f:
def ffmpeg_escape(text): def ffmpeg_escape(text):
return re.sub(r'([\\=;#\n])', r'\\\1', text) return re.sub(r'([\\=;#\n])', r'\\\1', text)
@ -981,7 +982,7 @@ def run(self, info):
with open(dfxp_file, 'rb') as f: with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read()) srt_data = dfxp2srt(f.read())
with open(srt_file, 'wt', encoding='utf-8') as f: with open(srt_file, 'w', encoding='utf-8') as f:
f.write(srt_data) f.write(srt_data)
old_file = srt_file old_file = srt_file

View file

@ -264,7 +264,8 @@ def update(self):
self._report_error('Unable to overwrite current version') self._report_error('Unable to overwrite current version')
return os.rename(old_filename, self.filename) return os.rename(old_filename, self.filename)
if detect_variant() in ('win32_exe', 'py2exe'): variant = detect_variant()
if variant.startswith('win') or variant == 'py2exe':
atexit.register(Popen, f'ping 127.0.0.1 -n 5 -w 1000 & del /F "{old_filename}"', atexit.register(Popen, f'ping 127.0.0.1 -n 5 -w 1000 & del /F "{old_filename}"',
shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
elif old_filename: elif old_filename:

View file

@ -18,7 +18,6 @@
import html.parser import html.parser
import http.client import http.client
import http.cookiejar import http.cookiejar
import importlib.util
import inspect import inspect
import io import io
import itertools import itertools
@ -2721,8 +2720,10 @@ def _get_exe_version_output(exe, args):
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if yt-dlp is run in the background. # SIGTTOU if yt-dlp is run in the background.
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True, stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True,
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
if ret:
return None
except OSError: except OSError:
return False return False
return stdout return stdout
@ -2740,11 +2741,15 @@ def detect_exe_version(output, version_re=None, unrecognized='present'):
def get_exe_version(exe, args=['--version'], def get_exe_version(exe, args=['--version'],
version_re=None, unrecognized='present'): version_re=None, unrecognized=('present', 'broken')):
""" Returns the version of the specified executable, """ Returns the version of the specified executable,
or False if the executable is not present """ or False if the executable is not present """
unrecognized = variadic(unrecognized)
assert len(unrecognized) in (1, 2)
out = _get_exe_version_output(exe, args) out = _get_exe_version_output(exe, args)
return detect_exe_version(out, version_re, unrecognized) if out else False if out is None:
return unrecognized[-1]
return out and detect_exe_version(out, version_re, unrecognized[0])
def frange(start=0, stop=None, step=1): def frange(start=0, stop=None, step=1):
@ -3360,7 +3365,13 @@ def fix_kv(m):
return f'"{i}":' if v.endswith(':') else str(i) return f'"{i}":' if v.endswith(':') else str(i)
if v in vars: if v in vars:
try:
if not strict:
json.loads(vars[v])
except json.JSONDecodeError:
return json.dumps(vars[v]) return json.dumps(vars[v])
else:
return vars[v]
if not strict: if not strict:
return f'"{v}"' return f'"{v}"'
@ -3395,7 +3406,7 @@ def q(qid):
return q return q
POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist') POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
DEFAULT_OUTTMPL = { DEFAULT_OUTTMPL = {
@ -3480,67 +3491,93 @@ def error_to_str(err):
return f'{type(err).__name__}: {err}' return f'{type(err).__name__}: {err}'
def mimetype2ext(mt): def mimetype2ext(mt, default=NO_DEFAULT):
if mt is None: if not isinstance(mt, str):
if default is not NO_DEFAULT:
return default
return None return None
mt, _, params = mt.partition(';') MAP = {
mt = mt.strip() # video
FULL_MAP = {
'audio/mp4': 'm4a',
# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
# it's the most popular one
'audio/mpeg': 'mp3',
'audio/x-wav': 'wav',
'audio/wav': 'wav',
'audio/wave': 'wav',
}
ext = FULL_MAP.get(mt)
if ext is not None:
return ext
SUBTYPE_MAP = {
'3gpp': '3gp', '3gpp': '3gp',
'smptett+xml': 'tt', 'mp2t': 'ts',
'ttaf+xml': 'dfxp', 'mp4': 'mp4',
'ttml+xml': 'ttml', 'mpeg': 'mpeg',
'x-flv': 'flv',
'x-mp4-fragmented': 'mp4',
'x-ms-sami': 'sami',
'x-ms-wmv': 'wmv',
'mpegurl': 'm3u8', 'mpegurl': 'm3u8',
'x-mpegurl': 'm3u8', 'quicktime': 'mov',
'vnd.apple.mpegurl': 'm3u8', 'webm': 'webm',
'vp9': 'vp9',
'x-flv': 'flv',
'x-m4v': 'm4v',
'x-matroska': 'mkv',
'x-mng': 'mng',
'x-mp4-fragmented': 'mp4',
'x-ms-asf': 'asf',
'x-ms-wmv': 'wmv',
'x-msvideo': 'avi',
# application (streaming playlists)
'dash+xml': 'mpd', 'dash+xml': 'mpd',
'f4m+xml': 'f4m', 'f4m+xml': 'f4m',
'hds+xml': 'f4m', 'hds+xml': 'f4m',
'vnd.apple.mpegurl': 'm3u8',
'vnd.ms-sstr+xml': 'ism', 'vnd.ms-sstr+xml': 'ism',
'quicktime': 'mov', 'x-mpegurl': 'm3u8',
'mp2t': 'ts',
# audio
'audio/mp4': 'm4a',
# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
# Using .mp3 as it's the most popular one
'audio/mpeg': 'mp3',
'audio/webm': 'webm',
'audio/x-matroska': 'mka',
'audio/x-mpegurl': 'm3u',
'midi': 'mid',
'ogg': 'ogg',
'wav': 'wav',
'wave': 'wav',
'x-aac': 'aac',
'x-flac': 'flac',
'x-m4a': 'm4a',
'x-realaudio': 'ra',
'x-wav': 'wav', 'x-wav': 'wav',
'filmstrip+json': 'fs',
# image
'avif': 'avif',
'bmp': 'bmp',
'gif': 'gif',
'jpeg': 'jpg',
'png': 'png',
'svg+xml': 'svg', 'svg+xml': 'svg',
} 'tiff': 'tif',
'vnd.wap.wbmp': 'wbmp',
'webp': 'webp',
'x-icon': 'ico',
'x-jng': 'jng',
'x-ms-bmp': 'bmp',
_, _, subtype = mt.rpartition('/') # caption
ext = SUBTYPE_MAP.get(subtype.lower()) 'filmstrip+json': 'fs',
if ext is not None: 'smptett+xml': 'tt',
return ext 'ttaf+xml': 'dfxp',
'ttml+xml': 'ttml',
'x-ms-sami': 'sami',
SUFFIX_MAP = { # misc
'gzip': 'gz',
'json': 'json', 'json': 'json',
'xml': 'xml', 'xml': 'xml',
'zip': 'zip', 'zip': 'zip',
'gzip': 'gz',
} }
_, _, suffix = subtype.partition('+') mimetype = mt.partition(';')[0].strip().lower()
ext = SUFFIX_MAP.get(suffix) _, _, subtype = mimetype.rpartition('/')
if ext is not None:
return ext
ext = traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
if ext:
return ext
elif default is not NO_DEFAULT:
return default
return subtype.replace('+', '.') return subtype.replace('+', '.')
@ -3624,7 +3661,7 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
COMPATIBLE_EXTS = ( COMPATIBLE_EXTS = (
{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'}, {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
{'webm'}, {'webm', 'weba'},
) )
for ext in preferences or vexts: for ext in preferences or vexts:
current_exts = {ext, *vexts, *aexts} current_exts = {ext, *vexts, *aexts}
@ -3634,7 +3671,7 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
return 'mkv' if allow_mkv else preferences[-1] return 'mkv' if allow_mkv else preferences[-1]
def urlhandle_detect_ext(url_handle): def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
getheader = url_handle.headers.get getheader = url_handle.headers.get
cd = getheader('Content-Disposition') cd = getheader('Content-Disposition')
@ -3645,7 +3682,13 @@ def urlhandle_detect_ext(url_handle):
if e: if e:
return e return e
return mimetype2ext(getheader('Content-Type')) meta_ext = getheader('x-amz-meta-name')
if meta_ext:
e = meta_ext.rpartition('.')[2]
if e:
return e
return mimetype2ext(getheader('Content-Type'), default=default)
def encode_data_uri(data, mime_type): def encode_data_uri(data, mime_type):
@ -5200,6 +5243,15 @@ def random_birthday(year_field, month_field, day_field):
} }
def find_available_port(interface=''):
try:
with socket.socket() as sock:
sock.bind((interface, 0))
return sock.getsockname()[1]
except OSError:
return None
# Templates for internet shortcut files, which are plain text files. # Templates for internet shortcut files, which are plain text files.
DOT_URL_LINK_TEMPLATE = '''\ DOT_URL_LINK_TEMPLATE = '''\
[InternetShortcut] [InternetShortcut]
@ -5334,22 +5386,23 @@ def get_executable_path():
return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1])) return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
def load_plugins(name, suffix, namespace): def get_user_config_dirs(package_name):
classes = {} # .config (e.g. ~/.config/package_name)
with contextlib.suppress(FileNotFoundError): xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
plugins_spec = importlib.util.spec_from_file_location( yield os.path.join(xdg_config_home, package_name)
name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
plugins = importlib.util.module_from_spec(plugins_spec) # appdata (%APPDATA%/package_name)
sys.modules[plugins_spec.name] = plugins appdata_dir = os.getenv('appdata')
plugins_spec.loader.exec_module(plugins) if appdata_dir:
for name in dir(plugins): yield os.path.join(appdata_dir, package_name)
if name in namespace:
continue # home (~/.package_name)
if not name.endswith(suffix): yield os.path.join(compat_expanduser('~'), f'.{package_name}')
continue
klass = getattr(plugins, name)
classes[name] = namespace[name] = klass def get_system_config_dirs(package_name):
return classes # /etc/package_name
yield os.path.join('/etc', package_name)
def traverse_obj( def traverse_obj(
@ -5371,7 +5424,7 @@ def traverse_obj(
The keys in the path can be one of: The keys in the path can be one of:
- `None`: Return the current object. - `None`: Return the current object.
- `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`. - `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
- `slice`: Branch out and return all values in `obj[key]`. - `slice`: Branch out and return all values in `obj[key]`.
- `Ellipsis`: Branch out and return a list of all values. - `Ellipsis`: Branch out and return a list of all values.
- `tuple`/`list`: Branch out and return a list of all matching values. - `tuple`/`list`: Branch out and return a list of all matching values.
@ -5592,7 +5645,6 @@ def windows_enable_vt_mode():
dll = ctypes.WinDLL('kernel32', use_last_error=False) dll = ctypes.WinDLL('kernel32', use_last_error=False)
handle = os.open('CONOUT$', os.O_RDWR) handle = os.open('CONOUT$', os.O_RDWR)
try: try:
h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle)) h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
dw_original_mode = ctypes.wintypes.DWORD() dw_original_mode = ctypes.wintypes.DWORD()
@ -5604,14 +5656,12 @@ def windows_enable_vt_mode():
dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING))
if not success: if not success:
raise Exception('SetConsoleMode failed') raise Exception('SetConsoleMode failed')
except Exception as e: finally:
write_string(f'WARNING: Cannot enable VT mode - {e}') os.close(handle)
else:
global WINDOWS_VT_MODE global WINDOWS_VT_MODE
WINDOWS_VT_MODE = True WINDOWS_VT_MODE = True
supports_terminal_sequences.cache_clear() supports_terminal_sequences.cache_clear()
finally:
os.close(handle)
_terminal_sequences_re = re.compile('\033\\[[^m]+m') _terminal_sequences_re = re.compile('\033\\[[^m]+m')
@ -5924,7 +5974,7 @@ def items_(self):
common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'), common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'), video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'), common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'), audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
thumbnails=('jpg', 'png', 'webp'), thumbnails=('jpg', 'png', 'webp'),
storyboards=('mhtml', ), storyboards=('mhtml', ),
subtitles=('srt', 'vtt', 'ass', 'lrc'), subtitles=('srt', 'vtt', 'ass', 'lrc'),
@ -6056,9 +6106,9 @@ class FormatSorter:
'vext': {'type': 'ordered', 'field': 'video_ext', 'vext': {'type': 'ordered', 'field': 'video_ext',
'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'), 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')}, 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
'aext': {'type': 'ordered', 'field': 'audio_ext', 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'), 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')}, 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}, 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple', 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
'field': ('vcodec', 'acodec'), 'field': ('vcodec', 'acodec'),
@ -6329,3 +6379,10 @@ def calculate_preference(self, format):
# Deprecated # Deprecated
has_certifi = bool(certifi) has_certifi = bool(certifi)
has_websockets = bool(websockets) has_websockets = bool(websockets)
def load_plugins(name, suffix, namespace):
from .plugins import load_plugins
ret = load_plugins(name, suffix)
namespace.update(ret)
return ret

View file

@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py # Autogenerated by devscripts/update-version.py
__version__ = '2022.11.11' __version__ = '2023.01.06'
RELEASE_GIT_HEAD = '8b644025b' RELEASE_GIT_HEAD = '6becd2508'
VARIANT = None VARIANT = None

View file

@ -1,4 +0,0 @@
# flake8: noqa: F401
# The imported name must end in "IE"
from .sample import SamplePluginIE

View file

@ -1,14 +0,0 @@
# ⚠ Don't use relative imports
from yt_dlp.extractor.common import InfoExtractor
# Instructions on making extractors can be found at:
# 🔗 https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site
class SamplePluginIE(InfoExtractor):
_WORKING = False
IE_DESC = False
_VALID_URL = r'^sampleplugin:'
def _real_extract(self, url):
self.to_screen('URL "%s" successfully captured' % url)

View file

@ -1,4 +0,0 @@
# flake8: noqa: F401
# The imported name must end in "PP" and is the name to be used in --use-postprocessor
from .sample import SamplePluginPP

View file

@ -1,26 +0,0 @@
# ⚠ Don't use relative imports
from yt_dlp.postprocessor.common import PostProcessor
# See the docstring of yt_dlp.postprocessor.common.PostProcessor
class SamplePluginPP(PostProcessor):
def __init__(self, downloader=None, **kwargs):
# ⚠ Only kwargs can be passed from the CLI, and all argument values will be string
# Also, "downloader", "when" and "key" are reserved names
super().__init__(downloader)
self._kwargs = kwargs
# See docstring of yt_dlp.postprocessor.common.PostProcessor.run
def run(self, info):
if info.get('_type', 'video') != 'video': # PP was called for playlist
self.to_screen(f'Post-processing playlist {info.get("id")!r} with {self._kwargs}')
elif info.get('filepath'): # PP was called after download (default)
filepath = info.get('filepath')
self.to_screen(f'Post-processed {filepath!r} with {self._kwargs}')
elif info.get('requested_downloads'): # PP was called after_video
filepaths = [f.get('filepath') for f in info.get('requested_downloads')]
self.to_screen(f'Post-processed {filepaths!r} with {self._kwargs}')
else: # PP was called before actual download
filepath = info.get('_filename')
self.to_screen(f'Pre-processed {filepath!r} with {self._kwargs}')
return [], info # return list_of_files_to_delete, info_dict