diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index dd1b33dde..f0fc71d57 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting that yt-dlp is broken on a **supported** site required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -64,7 +64,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -72,8 +72,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 4f4378924..ac9a72a1c 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -76,7 +76,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -84,8 +84,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 05b4dd23b..577e4d491 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm requesting a site-specific feature required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -72,7 +72,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -80,8 +80,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 880f1014c..9529c1bd6 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -57,7 +57,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -65,8 +65,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index acb11795f..b17a6e046 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -20,7 +20,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true @@ -53,7 +53,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -61,7 +61,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index a2563e975..5345e8917 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -26,7 +26,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.09.24** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true @@ -59,7 +59,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.09.24 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -67,7 +67,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.07.06, Current version: 2023.07.06 - yt-dlp is up to date (2023.07.06) + Latest version: 2023.09.24, Current version: 2023.09.24 + yt-dlp is up to date (2023.09.24) render: shell diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a8587fe92..90e7faf7c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -217,7 +217,7 @@ After you have ensured this site is distributing its content legally, you can fo 1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. 1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` 1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. -1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. +1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 yt_dlp/extractor/yourextractor.py @@ -251,7 +251,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou ### Mandatory and optional metafields -For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: +For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - `title` (media title) @@ -696,7 +696,7 @@ formats = [ ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. @@ -704,7 +704,7 @@ Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions. +Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions. #### Examples diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 6b9b9f470..72b9584ec 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -467,3 +467,39 @@ rdamas RfadnjdExt urectanc nao20010128nao/Lesmiscore +04-pasha-04 +aaruni96 +aky-01 +AmirAflak +ApoorvShah111 +at-wat +davinkevin +demon071 +denhotte +FinnRG +fireattack +Frankgoji +GD-Slime +hatsomatt +ifan-t +kshitiz305 +kylegustavo +mabdelfattah +nathantouze +niemands +Rajeshwaran2001 +RedDeffender +Rohxn16 +sb0stn +SevenLives +simon300000 +snixon +soundchaser128 +szabyg +trainman261 +trislee +wader +Yalab7 +zhallgato +zhong-yiyu +Zprokkel diff --git a/Changelog.md b/Changelog.md index 32cdaca2a..04511927f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,202 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2023.09.24 + +#### Important changes +- **The minimum *recommended* Python version has been raised to 3.8** +Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803) +- Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg) + - The shell escape function is now using `""` instead of `\"`. + - `utils.Popen` has been patched to properly quote commands. + +#### Core changes +- [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +- [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan) +- [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K) +- [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz) +- [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K) +- [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K) +- [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan) +- **compat** + - [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd)) + - [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) + - [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan) +- **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly) +- **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly) +- **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan) +- **utils** + - [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan) + - [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly) + - HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan) + - `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah) + - `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move) + +#### Extractor changes +- [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +- [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan) +- [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move) +- **abematv** + - [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives) + - [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan) +- **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg) +- **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos) +- **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly) +- **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick) +- **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305) +- **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317) +- **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick) +- **bilibili** + - [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt) + - [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22) + - [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt) +- **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime) +- **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K) +- **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0) +- **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld) +- **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261) +- **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261) +- **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317) +- **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K) +- **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt) +- **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly)) +- **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo) +- **facebook** + - [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1) + - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1) + - [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack) + - reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071) +- **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20) +- **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04) +- **generic** + - [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly) + - [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan) +- **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly) +- **hotstar** + - [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001) + - [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly) + - [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly) +- **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7) +- **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01) +- **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move) +- **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly) +- **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000) +- **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly) +- **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn) +- **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16) +- **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien) +- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato) +- **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly) +- **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317) +- **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317) +- **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak) +- **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly) +- **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly) +- **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png) +- **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly) +- **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111) +- **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly)) +- **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move) +- **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly) +- **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon) +- **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG) +- **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji) +- **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands) +- **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu) +- **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K) +- **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0) +- **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000) +- **reddit** + - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly) + - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly) +- **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly) +- **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128) +- **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee) +- **s4c** + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t) + - [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t) +- **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt) +- **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly) +- **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera) +- **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader) +- **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317) +- **tiktok** + - [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly) + - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly) +- **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly) +- **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli) +- **twitcasting** + - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt) + - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat) +- **twitter** + - [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly) + - [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly) + - [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly) + - [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan) + - spaces + - [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly) + - [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly) +- **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan) +- **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt) +- **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel) +- **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin) +- **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg) +- **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly) +- **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt) +- **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev) +- **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa) +- **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz) + - [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz) + - tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz) +- **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly) +- **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **external** + - [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly) + - [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly) + +#### Postprocessor changes +- **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic) + +#### Networking changes +- [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan) +- [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz) +- [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz) +- [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly) +- [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly) +- [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz) +- [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz) +- [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz) +- **Request Handler** + - urllib + - [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz) + - [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a)) + +#### Misc. changes +- **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan) +- **cleanup** + - [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) + - Miscellaneous + - [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan) + - [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT) + - [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K) +- **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K) +- **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K) +- **test** + - [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan) + - download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat) + ### 2023.07.06 #### Important changes diff --git a/README.md b/README.md index c7b73f4fd..a0b69c9a1 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t # NEW FEATURES -* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) +* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API @@ -1800,7 +1800,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) @@ -1809,6 +1809,7 @@ The following extractors use this feature: * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests +* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) @@ -1845,6 +1846,9 @@ The following extractors use this feature: * `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265` * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` +#### niconicochannelplus +* `max_comments`: Maximum number of comments to extract - default is `120` + #### tiktok * `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com` * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1` @@ -1865,6 +1869,9 @@ The following extractors use this feature: #### nhkradirulive (NHK らじる★らじる LIVE) * `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo` +#### nflplusreplay +* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default + **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index e7f453acf..fe0c82c66 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -88,5 +88,15 @@ "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85", "short": "[rh:urllib] Simplify gzip decoding (#7611)", "authors": ["Grub4K"] + }, + { + "action": "add", + "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b", + "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)" + }, + { + "action": "add", + "when": "61bdf15fc7400601c3da1aa7a43917310a5bf391", + "short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands." } ] diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index ac68dcd19..9ff65db14 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -260,7 +260,7 @@ class CommitRange: AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) MESSAGE_RE = re.compile(r''' (?:\[(?P[^\]]+)\]\ )? - (?:(?P`?[^:`]+`?): )? + (?:(?P`?[\w.-]+`?): )? (?P.+?) (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? ''', re.VERBOSE | re.DOTALL) diff --git a/supportedsites.md b/supportedsites.md index 379d28ef3..620e0f305 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -77,7 +77,7 @@ - **AnimalPlanet** - **ant1newsgr:article**: ant1news.gr articles - **ant1newsgr:embed**: ant1news.gr embedded videos - - **ant1newsgr:watch**: ant1news.gr videos + - **antenna:watch**: antenna.gr and ant1news.gr videos - **Anvato** - **aol.com**: Yahoo screen and movies - **APA** @@ -98,8 +98,6 @@ - **ArteTVCategory** - **ArteTVEmbed** - **ArteTVPlaylist** - - **AsianCrush** - - **AsianCrushPlaylist** - **AtresPlayer**: [*atresplayer*](## "netrc machine") - **AtScaleConfEvent** - **ATTTechChannel** @@ -118,6 +116,7 @@ - **awaan:live** - **awaan:season** - **awaan:video** + - **axs.tv** - **AZMedien**: AZ Medien videos - **BaiduVideo**: 百度视频 - **BanBye** @@ -162,11 +161,16 @@ - **BilibiliAudioAlbum** - **BiliBiliBangumi** - **BiliBiliBangumiMedia** + - **BiliBiliBangumiSeason** + - **BilibiliCollectionList** + - **BilibiliFavoritesList** - **BiliBiliPlayer** + - **BilibiliPlaylist** - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix + - **BilibiliSeriesList** - **BilibiliSpaceAudio** - - **BilibiliSpacePlaylist** - **BilibiliSpaceVideo** + - **BilibiliWatchlater** - **BiliIntl**: [*biliintl*](## "netrc machine") - **biliIntl:series**: [*biliintl*](## "netrc machine") - **BiliLive** @@ -201,6 +205,8 @@ - **BreitBart** - **brightcove:legacy** - **brightcove:new** + - **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org + - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **BRMediathek**: Bayerischer Rundfunk Mediathek - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen @@ -220,14 +226,17 @@ - **Camsoda** - **CamtasiaEmbed** - **CamWithHer** + - **Canal1** - **CanalAlpha** - **canalc2.tv** - **Canalplus**: mycanal.fr and piwiplus.fr + - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine") - **CarambaTV** - **CarambaTVPage** - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** + - **cbc.ca:​player:playlist** - **CBS** - **CBSInteractive** - **CBSLocal** @@ -257,6 +266,8 @@ - **Cinchcast** - **Cinemax** - **CinetecaMilano** + - **Cineverse** + - **CineverseDetails** - **CiscoLiveSearch** - **CiscoLiveSession** - **ciscowebex**: Cisco Webex @@ -365,7 +376,7 @@ - **Dotsub** - **Douyin** - **DouyuShow** - - **DouyuTV**: 斗鱼 + - **DouyuTV**: 斗鱼直播 - **DPlay** - **DRBonanza** - **Drooble** @@ -408,6 +419,7 @@ - **Engadget** - **Epicon** - **EpiconSeries** + - **eplus:inbound**: e+ (イープラス) overseas - **Epoch** - **Eporner** - **EroProfile**: [*eroprofile*](## "netrc machine") @@ -732,6 +744,7 @@ - **lynda**: [*lynda*](## "netrc machine") lynda.com videos - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses - **m6** + - **MagellanTV** - **MagentaMusik360** - **mailru**: Видео@Mail.Ru - **mailru:music**: Музыка@Mail.Ru @@ -812,6 +825,7 @@ - **Mofosex** - **MofosexEmbed** - **Mojvideo** + - **Monstercat** - **MonsterSirenHypergryphMusic** - **Morningstar**: morningstar.com - **Motherless** @@ -840,6 +854,7 @@ - **MujRozhlas** - **Murrtube** - **MurrtubeUser**: Murrtube user profile + - **MuseAI** - **MuseScore** - **MusicdexAlbum** - **MusicdexArtist** @@ -944,6 +959,9 @@ - **niconico:playlist** - **niconico:series** - **niconico:tag**: NicoNico video tag URLs + - **NiconicoChannelPlus**: ニコニコチャンネルプラス + - **NiconicoChannelPlus:​channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives + - **NiconicoChannelPlus:​channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos - **NiconicoUser** - **nicovideo:search**: Nico video search; "nicosearch:" prefix - **nicovideo:​search:date**: Nico video search, newest first; "nicosearchdate:" prefix @@ -1046,6 +1064,7 @@ - **Patreon** - **PatreonCampaign** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) + - **PBSKids** - **PearVideo** - **PeekVids** - **peer.tv** @@ -1062,6 +1081,7 @@ - **phoenix.de** - **Photobucket** - **Piapro**: [*piapro*](## "netrc machine") + - **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM - **Picarto** - **PicartoVod** - **Piksel** @@ -1105,6 +1125,7 @@ - **polskieradio:​podcast:list** - **Popcorntimes** - **PopcornTV** + - **Pornbox** - **PornCom** - **PornerBros** - **Pornez** @@ -1121,7 +1142,6 @@ - **PornTop** - **PornTube** - **Pr0gramm** - - **Pr0grammStatic** - **PrankCast** - **PremiershipRugby** - **PressTV** @@ -1156,6 +1176,10 @@ - **radiocanada** - **radiocanada:audiovideo** - **radiofrance** + - **RadioFranceLive** + - **RadioFrancePodcast** + - **RadioFranceProfile** + - **RadioFranceProgramSchedule** - **RadioJavan** - **radiokapital** - **radiokapital:show** @@ -1177,6 +1201,7 @@ - **RayWenderlichCourse** - **RbgTum** - **RbgTumCourse** + - **RbgTumNewCourse** - **RBMARadio** - **RCS** - **RCSEmbeds** @@ -1259,6 +1284,8 @@ - **Ruutu** - **Ruv** - **ruv.is:spila** + - **S4C** + - **S4CSeries** - **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video - **safari:api**: [*safari*](## "netrc machine") - **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses @@ -1325,6 +1352,7 @@ - **Smotrim** - **Snotr** - **Sohu** + - **SohuV** - **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIVSeries** - **soundcloud**: [*soundcloud*](## "netrc machine") @@ -1378,7 +1406,6 @@ - **StoryFireSeries** - **StoryFireUser** - **Streamable** - - **Streamanity** - **streamcloud.eu** - **StreamCZ** - **StreamFF** @@ -1403,6 +1430,9 @@ - **Tagesschau** - **Tass** - **TBS** + - **TBSJPEpisode** + - **TBSJPPlaylist** + - **TBSJPProgram** - **TDSLifeway** - **Teachable**: [*teachable*](## "netrc machine") - **TeachableCourse**: [*teachable*](## "netrc machine") @@ -1702,7 +1732,6 @@ - **wdr:mobile**: (**Currently broken**) - **WDRElefant** - **WDRPage** - - **web.archive:vlive**: web.archive.org saved vlive videos - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix - **Webcamerapl** - **Webcaster** @@ -1710,7 +1739,8 @@ - **WebOfStories** - **WebOfStoriesPlaylist** - **Weibo** - - **WeiboMobile** + - **WeiboUser** + - **WeiboVideo** - **WeiqiTV**: WQTV - **wetv:episode** - **WeTvSeries** @@ -1726,6 +1756,7 @@ - **Whyp** - **wikimedia.org** - **Willow** + - **Wimbledon** - **WimTV** - **WinSportsVideo** - **Wistia** diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 3cfb61fb2..0cf130db0 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -631,7 +631,6 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(test_dict['playlist'], 'funny videos') outtmpl_info = { - 'id': '1234', 'id': '1234', 'ext': 'mp4', 'width': None, @@ -785,9 +784,9 @@ class TestYoutubeDL(unittest.TestCase): test('%(title4)#S', 'foo_bar_test') test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' '))) if compat_os_name == 'nt': - test('%(title4)q', ('"foo \\"bar\\" test"', ""foo ⧹"bar⧹" test"")) - test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', '"id 1" "id 2" "id 3"')) - test('%(formats.0.id)#q', ('"id 1"', '"id 1"')) + test('%(title4)q', ('"foo ""bar"" test"', None)) + test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) + test('%(formats.0.id)#q', ('"id 1"', None)) else: test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\'')) test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'") diff --git a/test/test_download.py b/test/test_download.py index 6f00a4ded..253079249 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -31,6 +31,7 @@ from yt_dlp.utils import ( DownloadError, ExtractorError, UnavailableVideoError, + YoutubeDLError, format_bytes, join_nonempty, ) @@ -100,6 +101,8 @@ def generator(test_case, tname): print_skipping('IE marked as not _WORKING') for tc in test_cases: + if tc.get('expected_exception'): + continue info_dict = tc.get('info_dict', {}) params = tc.get('params', {}) if not info_dict.get('id'): @@ -139,6 +142,17 @@ def generator(test_case, tname): res_dict = None + def match_exception(err): + expected_exception = test_case.get('expected_exception') + if not expected_exception: + return False + if err.__class__.__name__ == expected_exception: + return True + for exc in err.exc_info: + if exc.__class__.__name__ == expected_exception: + return True + return False + def try_rm_tcs_files(tcs=None): if tcs is None: tcs = test_cases @@ -161,6 +175,8 @@ def generator(test_case, tname): except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503): + if match_exception(err): + return err.msg = f'{getattr(err, "msg", err)} ({tname})' raise @@ -171,6 +187,10 @@ def generator(test_case, tname): print(f'Retrying: {try_num} failed tries\n\n##########\n\n') try_num += 1 + except YoutubeDLError as err: + if match_exception(err): + return + raise else: break diff --git a/test/test_execution.py b/test/test_execution.py index 7a9e800b6..fb2f6e2e9 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -45,6 +45,9 @@ class TestExecution(unittest.TestCase): self.assertTrue(os.path.exists(LAZY_EXTRACTORS)) _, stderr = self.run_yt_dlp(opts=('-s', 'test:')) + # `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated python versions + if stderr and stderr.startswith('Deprecated Feature: Support for Python'): + stderr = '' self.assertFalse(stderr) subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL) diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py index dbf656090..419aae1e4 100644 --- a/test/test_networking_utils.py +++ b/test/test_networking_utils.py @@ -269,14 +269,14 @@ class TestNetworkingExceptions: assert not response.closed def test_incomplete_read_error(self): - error = IncompleteRead(b'test', 3, cause='test') + error = IncompleteRead(4, 3, cause='test') assert isinstance(error, IncompleteRead) assert repr(error) == '' assert str(error) == error.msg == '4 bytes read, 3 more expected' - assert error.partial == b'test' + assert error.partial == 4 assert error.expected == 3 assert error.cause == 'test' - error = IncompleteRead(b'aaa') + error = IncompleteRead(3) assert repr(error) == '' assert str(error) == '3 bytes read' diff --git a/test/test_utils.py b/test/test_utils.py index 91e3ffd39..fd612ff86 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -14,6 +14,7 @@ import contextlib import io import itertools import json +import subprocess import xml.etree.ElementTree from yt_dlp.compat import ( @@ -28,6 +29,7 @@ from yt_dlp.utils import ( InAdvancePagedList, LazyList, OnDemandPagedList, + Popen, age_restricted, args_to_str, base_url, @@ -1218,6 +1220,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""') self.assertEqual(js_to_json('`${name}`', {}), '"name"') + def test_js_to_json_map_array_constructors(self): + self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5}) + self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10]) + self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5]) + self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5}) + def test_extract_attributes(self): self.assertEqual(extract_attributes(''), {'x': 'y'}) self.assertEqual(extract_attributes(""), {'x': 'y'}) @@ -2382,6 +2390,21 @@ Line 1 assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') + @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') + def test_Popen_windows_escaping(self): + def run_shell(args): + stdout, stderr, error = Popen.run( + args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + assert not stderr + assert not error + return stdout + + # Test escaping + assert run_shell(['echo', 'test"&']) == '"test""&"\n' + # Test if delayed expansion is disabled + assert run_shell(['echo', '^!']) == '"^!"\n' + assert run_shell('echo "^!"') == '"^!"\n' + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1feed3052..f322b12a2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -60,7 +60,7 @@ from .postprocessor import ( get_postprocessor, ) from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping -from .update import REPOSITORY, current_git_head, detect_variant +from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant from .utils import ( DEFAULT_OUTTMPL, IDENTITY, @@ -239,9 +239,9 @@ class YoutubeDL: 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' - 'temp' and the keys of OUTTMPL_TYPES (in utils.py) + 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py) outtmpl: Dictionary of templates for output names. Allowed keys - are 'default' and the keys of OUTTMPL_TYPES (in utils.py). + are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py). For compatibility with youtube-dl, a single string can also be used outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names @@ -422,7 +422,7 @@ class YoutubeDL: asked whether to download the video. - Raise utils.DownloadCancelled(msg) to abort remaining downloads when a video is rejected. - match_filter_func in utils.py is one example for this. + match_filter_func in utils/_utils.py is one example for this. color: A Dictionary with output stream names as keys and their respective color policy as values. Can also just be a single color policy, @@ -640,17 +640,9 @@ class YoutubeDL: for name, stream in self._out_files.items_ if name != 'console' }) - # The code is left like this to be reused for future deprecations - MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7) - current_version = sys.version_info[:2] - if current_version < MIN_RECOMMENDED: - msg = ('Support for Python version %d.%d has been deprecated. ' - 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.' - '\n You will no longer receive updates on this version') - if current_version < MIN_SUPPORTED: - msg = 'Python version %d.%d is no longer supported' - self.deprecated_feature( - f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED)) + system_deprecation = _get_system_deprecation() + if system_deprecation: + self.deprecated_feature(system_deprecation.replace('\n', '\n ')) if self.params.get('allow_unplayable_formats'): self.report_warning( diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index 832a9138d..5ad5c70ec 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -30,7 +30,7 @@ compat_os_name = os._name if os.name == 'java' else os.name if compat_os_name == 'nt': def compat_shlex_quote(s): import re - return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') + return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""') else: from shlex import quote as compat_shlex_quote # noqa: F401 diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index 3ca46d270..d62b7d048 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -15,7 +15,7 @@ def get_package_info(module): name=getattr(module, '_yt_dlp__identifier', module.__name__), version=str(next(filter(None, ( getattr(module, attr, None) - for attr in ('__version__', 'version_string', 'version') + for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version') )), None))) diff --git a/yt_dlp/compat/urllib/__init__.py b/yt_dlp/compat/urllib/__init__.py index b27cc6133..9084b3c2b 100644 --- a/yt_dlp/compat/urllib/__init__.py +++ b/yt_dlp/compat/urllib/__init__.py @@ -1,7 +1,7 @@ # flake8: noqa: F405 from urllib import * # noqa: F403 -del request +del request # noqa: F821 from . import request # noqa: F401 from ..compat_utils import passthrough_module diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py index 6e7d29c5c..b56e4f5cc 100644 --- a/yt_dlp/dependencies/__init__.py +++ b/yt_dlp/dependencies/__init__.py @@ -43,6 +43,8 @@ except Exception as _err: try: import sqlite3 + # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152 + sqlite3._yt_dlp__version = sqlite3.sqlite_version except ImportError: # although sqlite3 is part of the standard library, it is possible to compile python without # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 802a9804d..d42d719d3 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -137,10 +137,6 @@ from .arte import ( ArteTVCategoryIE, ) from .arnes import ArnesIE -from .asiancrush import ( - AsianCrushIE, - AsianCrushPlaylistIE, -) from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE from .atttechchannel import ATTTechChannelIE @@ -275,6 +271,10 @@ from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) +from .brilliantpala import ( + BrilliantpalaElearnIE, + BrilliantpalaClassesIE, +) from .businessinsider import BusinessInsiderIE from .bundesliga import BundesligaIE from .buzzfeed import BuzzFeedIE @@ -296,9 +296,11 @@ from .cammodels import CamModelsIE from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE from .camwithher import CamWithHerIE +from .canal1 import Canal1IE from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE +from .caracoltv import CaracolTvPlayIE from .carambatv import ( CarambaTVIE, CarambaTVPageIE, @@ -356,6 +358,10 @@ from .chirbit import ( from .cinchcast import CinchcastIE from .cinemax import CinemaxIE from .cinetecamilano import CinetecaMilanoIE +from .cineverse import ( + CineverseIE, + CineverseDetailsIE, +) from .ciscolive import ( CiscoLiveSessionIE, CiscoLiveSearchIE, @@ -565,8 +571,10 @@ from .epicon import ( EpiconIE, EpiconSeriesIE, ) +from .eplus import EplusIbIE from .epoch import EpochIE from .eporner import EpornerIE +from .erocast import ErocastIE from .eroprofile import ( EroProfileIE, EroProfileAlbumIE, @@ -944,6 +952,7 @@ from .lastfm import ( from .lbry import ( LBRYIE, LBRYChannelIE, + LBRYPlaylistIE, ) from .lci import LCIIE from .lcp import ( @@ -1124,6 +1133,7 @@ from .mofosex import ( ) from .mojvideo import MojvideoIE from .mojevideo import MojevideoIE +from .monstercat import MonstercatIE from .morningstar import MorningstarIE from .motherless import ( MotherlessIE, @@ -1296,6 +1306,11 @@ from .ninecninemedia import ( NineCNineMediaIE, CPTwentyFourIE, ) +from .niconicochannelplus import ( + NiconicoChannelPlusIE, + NiconicoChannelPlusChannelVideosIE, + NiconicoChannelPlusChannelLivesIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE @@ -1449,6 +1464,7 @@ from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .piapro import PiaproIE +from .piaulizaportal import PIAULIZAPortalIE from .picarto import ( PicartoIE, PicartoVodIE, @@ -1599,6 +1615,7 @@ from .rbmaradio import RBMARadioIE from .rbgtum import ( RbgTumIE, RbgTumCourseIE, + RbgTumNewCourseIE, ) from .rcs import ( RCSIE, diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index f56133eb3..9d527246a 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -180,20 +180,103 @@ class ABCIViewIE(InfoExtractor): _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P[^/?#]+)' _GEO_COUNTRIES = ['AU'] - # ABC iview programs are normally available for 14 days only. _TESTS = [{ + 'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00', + 'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed', + 'info_dict': { + 'id': 'CO1211V001S00', + 'ext': 'mp4', + 'title': 'Series 1 Ep 1 Wood For The Trees', + 'series': 'Utopia', + 'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00', + 'upload_date': '20230726', + 'uploader_id': 'abc1', + 'series_id': 'CO1211V', + 'episode_id': 'CO1211V001S00', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Wood For The Trees', + 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg', + 'timestamp': 1690403700, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'note': 'No episode name', 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', 'md5': '67715ce3c78426b11ba167d875ac6abf', 'info_dict': { 'id': 'LE1927H001S00', 'ext': 'mp4', - 'title': "Series 11 Ep 1", - 'series': "Gruen", + 'title': 'Series 11 Ep 1', + 'series': 'Gruen', 'description': 'md5:52cc744ad35045baf6aded2ce7287f67', 'upload_date': '20190925', 'uploader_id': 'abc1', + 'series_id': 'LE1927H', + 'episode_id': 'LE1927H001S00', + 'season_number': 11, + 'season': 'Season 11', + 'episode_number': 1, + 'episode': 'Episode 1', + 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg', 'timestamp': 1569445289, }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'params': { + 'skip_download': True, + }, + }, { + 'note': 'No episode number', + 'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00', + 'md5': '77cb7d8434440e3b28fbebe331c2456a', + 'info_dict': { + 'id': 'NC2203H039S00', + 'ext': 'mp4', + 'title': 'Series 2022 Locking Up Kids', + 'series': 'Four Corners', + 'description': 'md5:54829ca108846d1a70e1fcce2853e720', + 'upload_date': '20221114', + 'uploader_id': 'abc1', + 'series_id': 'NC2203H', + 'episode_id': 'NC2203H039S00', + 'season_number': 2022, + 'season': 'Season 2022', + 'episode_number': None, + 'episode': 'Locking Up Kids', + 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg', + 'timestamp': 1668460497, + + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'params': { + 'skip_download': True, + }, + }, { + 'note': 'No episode name or number', + 'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00', + 'md5': '2e17dec06b13cc81dc119d2565289396', + 'info_dict': { + 'id': 'RF2004Q043S00', + 'ext': 'mp4', + 'title': 'Series 2021', + 'series': 'Landline', + 'description': 'md5:c9f30d9c0c914a7fd23842f6240be014', + 'upload_date': '20211205', + 'uploader_id': 'abc1', + 'series_id': 'RF2004Q', + 'episode_id': 'RF2004Q043S00', + 'season_number': 2021, + 'season': 'Season 2021', + 'episode_number': None, + 'episode': None, + 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg', + 'timestamp': 1638710705, + + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], 'params': { 'skip_download': True, }, @@ -255,6 +338,8 @@ class ABCIViewIE(InfoExtractor): 'episode_number': int_or_none(self._search_regex( r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), 'episode_id': house_number, + 'episode': self._search_regex( + r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None, 'uploader_id': video_params.get('channel'), 'formats': formats, 'subtitles': subtitles, diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index e3cc5afb0..a19cd2a3a 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -169,7 +169,7 @@ class ArteTVIE(ArteTVBaseIE): ))) short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?') - if stream['protocol'].startswith('HLS'): + if 'HLS' in stream['protocol']: fmts, subs = self._extract_m3u8_formats_and_subtitles( stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False) for fmt in fmts: diff --git a/yt_dlp/extractor/asiancrush.py b/yt_dlp/extractor/asiancrush.py deleted file mode 100644 index 23f310edb..000000000 --- a/yt_dlp/extractor/asiancrush.py +++ /dev/null @@ -1,196 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from .kaltura import KalturaIE -from ..utils import ( - extract_attributes, - int_or_none, - OnDemandPagedList, - parse_age_limit, - strip_or_none, - try_get, -) - - -class AsianCrushBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?(?P(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))' - _KALTURA_KEYS = [ - 'video_url', 'progressive_url', 'download_url', 'thumbnail_url', - 'widescreen_thumbnail_url', 'screencap_widescreen', - ] - _API_SUFFIX = {'retrocrush.tv': '-ott'} - - def _call_api(self, host, endpoint, video_id, query, resource): - return self._download_json( - 'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id, - 'Downloading %s JSON metadata' % resource, query=query, - headers=self.geo_verification_headers())['objects'] - - def _download_object_data(self, host, object_id, resource): - return self._call_api( - host, 'search', object_id, {'id': object_id}, resource)[0] - - def _get_object_description(self, obj): - return strip_or_none(obj.get('long_description') or obj.get('short_description')) - - def _parse_video_data(self, video): - title = video['name'] - - entry_id, partner_id = [None] * 2 - for k in self._KALTURA_KEYS: - k_url = video.get(k) - if k_url: - mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url) - if mobj: - partner_id, entry_id = mobj.groups() - break - - meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or [] - categories = list(filter(None, [c.get('name') for c in meta_categories])) - - show_info = video.get('show_info') or {} - - return { - '_type': 'url_transparent', - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), - 'ie_key': KalturaIE.ie_key(), - 'id': entry_id, - 'title': title, - 'description': self._get_object_description(video), - 'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')), - 'categories': categories, - 'series': show_info.get('show_name'), - 'season_number': int_or_none(show_info.get('season_num')), - 'season_id': show_info.get('season_id'), - 'episode_number': int_or_none(show_info.get('episode_num')), - } - - -class AsianCrushIE(AsianCrushBaseIE): - _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE - _TESTS = [{ - 'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt', - 'md5': 'c3b740e48d0ba002a42c0b72857beae6', - 'info_dict': { - 'id': '1_y4tmjm5r', - 'ext': 'mp4', - 'title': 'Women Who Flirt', - 'description': 'md5:b65c7e0ae03a85585476a62a186f924c', - 'timestamp': 1496936429, - 'upload_date': '20170608', - 'uploader_id': 'craig@crifkin.com', - 'age_limit': 13, - 'categories': 'count:5', - 'duration': 5812, - }, - }, { - 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', - 'only_matching': True, - }, { - 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', - 'only_matching': True, - }, { - 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', - 'only_matching': True, - }, { - 'url': 'https://www.midnightpulp.com/video/010400v/drifters/', - 'only_matching': True, - }, { - 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', - 'only_matching': True, - }, { - 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', - 'only_matching': True, - }, { - 'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears', - 'only_matching': True, - }] - - def _real_extract(self, url): - host, video_id = self._match_valid_url(url).groups() - - if host == 'cocoro.tv': - webpage = self._download_webpage(url, video_id) - embed_vars = self._parse_json(self._search_regex( - r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', - default='{}'), video_id, fatal=False) or {} - video_id = embed_vars.get('entry_id') or video_id - - video = self._download_object_data(host, video_id, 'video') - return self._parse_video_data(video) - - -class AsianCrushPlaylistIE(AsianCrushBaseIE): - _VALID_URL = r'%s/series/0+(?P\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE - _TESTS = [{ - 'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai', - 'info_dict': { - 'id': '6447', - 'title': 'Fruity Samurai', - 'description': 'md5:7535174487e4a202d3872a7fc8f2f154', - }, - 'playlist_count': 13, - }, { - 'url': 'https://www.yuyutv.com/series/013920s/peep-show/', - 'only_matching': True, - }, { - 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', - 'only_matching': True, - }, { - 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', - 'only_matching': True, - }, { - 'url': 'https://www.retrocrush.tv/series/012355s/true-tears', - 'only_matching': True, - }] - _PAGE_SIZE = 1000000000 - - def _fetch_page(self, domain, parent_id, page): - videos = self._call_api( - domain, 'getreferencedobjects', parent_id, { - 'max': self._PAGE_SIZE, - 'object_type': 'video', - 'parent_id': parent_id, - 'start': page * self._PAGE_SIZE, - }, 'page %d' % (page + 1)) - for video in videos: - yield self._parse_video_data(video) - - def _real_extract(self, url): - host, playlist_id = self._match_valid_url(url).groups() - - if host == 'cocoro.tv': - webpage = self._download_webpage(url, playlist_id) - - entries = [] - - for mobj in re.finditer( - r']+href=(["\'])(?P%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL, - webpage): - attrs = extract_attributes(mobj.group(0)) - if attrs.get('class') == 'clearfix': - entries.append(self.url_result( - mobj.group('url'), ie=AsianCrushIE.ie_key())) - - title = self._html_search_regex( - r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, - 'title', default=None) or self._og_search_title( - webpage, default=None) or self._html_search_meta( - 'twitter:title', webpage, 'title', - default=None) or self._html_extract_title(webpage) - if title: - title = re.sub(r'\s*\|\s*.+?$', '', title) - - description = self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'twitter:description', webpage, 'description', fatal=False) - else: - show = self._download_object_data(host, playlist_id, 'show') - title = show.get('name') - description = self._get_object_description(show) - entries = OnDemandPagedList( - functools.partial(self._fetch_page, host, playlist_id), - self._PAGE_SIZE) - - return self.playlist_result(entries, playlist_id, title, description) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 5e7042dbb..9119f396b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -49,14 +49,14 @@ class BilibiliBaseIE(InfoExtractor): for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality'])) } - audios = traverse_obj(play_info, ('dash', 'audio', ...)) + audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict})) flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio')) if flac_audio: audios.append(flac_audio) formats = [{ 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'), 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')), - 'acodec': audio.get('codecs'), + 'acodec': traverse_obj(audio, ('codecs', {str.lower})), 'vcodec': 'none', 'tbr': float_or_none(audio.get('bandwidth'), scale=1000), 'filesize': int_or_none(audio.get('size')), @@ -71,6 +71,7 @@ class BilibiliBaseIE(InfoExtractor): 'height': int_or_none(video.get('height')), 'vcodec': video.get('codecs'), 'acodec': 'none' if audios else None, + 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))), 'tbr': float_or_none(video.get('bandwidth'), scale=1000), 'filesize': int_or_none(video.get('size')), 'quality': int_or_none(video.get('id')), diff --git a/yt_dlp/extractor/brilliantpala.py b/yt_dlp/extractor/brilliantpala.py new file mode 100644 index 000000000..6fd5b8148 --- /dev/null +++ b/yt_dlp/extractor/brilliantpala.py @@ -0,0 +1,127 @@ +import hashlib + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + urlencode_postdata, +) + + +class BrilliantpalaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'brilliantpala' + _DOMAIN = '{subdomain}.brilliantpala.org' + + def _initialize_pre_login(self): + self._HOMEPAGE = f'https://{self._DOMAIN}' + self._LOGIN_API = f'{self._HOMEPAGE}/login/' + self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/' + self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/' + self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/' + + def _get_logged_in_username(self, url, video_id): + webpage, urlh = self._download_webpage_handle(url, video_id) + if self._LOGIN_API == urlh.url: + self.raise_login_required() + return self._html_search_regex( + r'"username"\s*:\s*"(?P[^"]+)"', webpage, 'stream page info', 'username') + + def _perform_login(self, username, password): + login_form = self._hidden_inputs(self._download_webpage( + self._LOGIN_API, None, 'Downloading login page')) + login_form.update({ + 'username': username, + 'password': password, + }) + self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken']) + + logged_page = self._download_webpage( + self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API}, + data=urlencode_postdata(login_form)) + + if self._html_search_regex( + r'(Your username / email and password)', logged_page, 'auth fail', default=None): + raise ExtractorError('wrong username or password', expected=True) + + # the maximum number of logins is one + if self._html_search_regex( + r'(Logout Other Devices)', logged_page, 'logout devices button', default=None): + logout_device_form = self._hidden_inputs(logged_page) + self._download_webpage( + self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API}, + note='Logging out other devices', data=urlencode_postdata(logout_device_form)) + + def _real_extract(self, url): + course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id') + video_id = f'{course_id}-{content_id}' + + username = self._get_logged_in_username(url, video_id) + + content_json = self._download_json( + self._CONTENT_API.format(content_id=content_id), video_id, + note='Fetching content info', errnote='Unable to fetch content info') + + entries = [] + for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])): + formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False) + if not formats: + continue + entries.append({ + 'id': str(stream['id']), + 'title': content_json.get('title'), + 'formats': formats, + 'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)}, + 'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()}, + 'thumbnail': content_json.get('cover_image'), + }) + + return self.playlist_result( + entries, playlist_id=video_id, playlist_title=content_json.get('title')) + + +class BrilliantpalaElearnIE(BrilliantpalaBaseIE): + IE_NAME = 'Brilliantpala:Elearn' + IE_DESC = 'VoD on elearn.brilliantpala.org' + _VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P\d+)/contents/(?P\d+)/?' + _TESTS = [{ + 'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/', + 'only_matching': True, + }, { + 'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/', + 'info_dict': { + 'id': '23577', + 'ext': 'mp4', + 'title': 'Physical World, Units and Measurements - 1', + 'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + }, + }] + + _DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn') + + +class BrilliantpalaClassesIE(BrilliantpalaBaseIE): + IE_NAME = 'Brilliantpala:Classes' + IE_DESC = 'VoD on classes.brilliantpala.org' + _VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P\d+)/contents/(?P\d+)/?' + _TESTS = [{ + 'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/', + 'only_matching': True, + }, { + 'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/', + 'info_dict': { + 'id': '9128', + 'ext': 'mp4', + 'title': 'Motion in a Straight Line - Class 1', + 'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + }, + }] + + _DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes') diff --git a/yt_dlp/extractor/canal1.py b/yt_dlp/extractor/canal1.py new file mode 100644 index 000000000..587a11ab8 --- /dev/null +++ b/yt_dlp/extractor/canal1.py @@ -0,0 +1,39 @@ +from .common import InfoExtractor + + +class Canal1IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P[\w-]+)' + + _TESTS = [{ + 'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/', + 'info_dict': { + 'id': '63b39f6b354977084b85ab54', + 'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco', + 'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó', + 'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013', + 'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54', + 'ext': 'mp4', + }, + }, { + 'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/', + 'info_dict': { + 'id': '63b39e93f5fd223aa32250fb', + 'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter', + 'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter', + 'description': 'md5:d9f691f131a21ce6767ca6c05d17d791', + 'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb', + 'ext': 'mp4', + }, + }, { + # Geo-restricted to Colombia + 'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + return self.url_result( + self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'), + display_id=display_id, url_transparent=True) diff --git a/yt_dlp/extractor/caracoltv.py b/yt_dlp/extractor/caracoltv.py new file mode 100644 index 000000000..79f7752fe --- /dev/null +++ b/yt_dlp/extractor/caracoltv.py @@ -0,0 +1,136 @@ +import base64 +import json +import uuid + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + traverse_obj, + urljoin, +) + + +class CaracolTvPlayIE(InfoExtractor): + _VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P[^/?#]+)' + _NETRC_MACHINE = 'caracoltv-play' + + _TESTS = [{ + 'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==', + 'info_dict': { + 'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==', + 'title': 'La teoría del promedio', + 'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3', + }, + 'playlist_count': 6, + }, { + 'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0', + 'info_dict': { + 'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==', + 'title': 'Ella', + 'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8', + }, + 'playlist_count': 10, + }, { + 'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0', + 'info_dict': { + 'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==', + 'title': 'La vuelta al mundo en 80 risas 2022', + 'description': 'md5:e97aac36106e5c37ebf947b3350106a4', + }, + 'playlist_count': 17, + }, { + 'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1', + 'only_matching': True, + }] + + _USER_TOKEN = None + + def _extract_app_token(self, webpage): + config_js_path = self._search_regex( + r']+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False) + + mediation_config = {} if not config_js_path else self._search_json( + r'mediation\s*:', self._download_webpage( + urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'), + 'mediation_config', None, transform_source=js_to_json, fatal=False) + + key = traverse_obj( + mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50' + secret = traverse_obj( + mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0' + + return base64.b64encode(f'{key}:{secret}'.encode()).decode() + + def _perform_login(self, email, password): + webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False) + app_token = self._extract_app_token(webpage) + + bearer_token = self._download_json( + 'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token', + headers={'Authorization': f'Basic {app_token}'})['token'] + + self._USER_TOKEN = self._download_json( + 'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={ + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {bearer_token}', + }, data=json.dumps({ + 'device_data': { + 'device_id': str(uuid.uuid4()), + 'device_token': '', + 'device_type': 'web' + }, + 'login_data': { + 'enabled': True, + 'email': email, + 'password': password, + } + }).encode())['user_token'] + + def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None): + formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4') + + return { + 'id': video_data['id'], + 'title': video_data.get('name'), + 'description': video_data.get('description'), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': traverse_obj( + video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})), + 'series_id': series_id, + 'season_id': season_id, + 'season_number': int_or_none(season_number), + 'episode_number': int_or_none(video_data.get('item_order')), + 'is_live': video_data.get('entry_type') == 3, + } + + def _extract_series_seasons(self, seasons, series_id): + for season in seasons: + api_response = self._download_json( + 'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']}, + headers={'Authorization': f'Bearer {self._USER_TOKEN}'}) + + season_number = season.get('order') + for episode in api_response['items']: + yield self._extract_video(episode, series_id, season['id'], season_number) + + def _real_extract(self, url): + series_id = self._match_id(url) + + if self._USER_TOKEN is None: + self._perform_login('guest@inmobly.com', 'Test@gus1') + + api_response = self._download_json( + 'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id}, + headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0] + + if not api_response.get('seasons'): + return self._extract_video(api_response) + + return self.playlist_result( + self._extract_series_seasons(api_response['seasons'], series_id), + series_id, **traverse_obj(api_response, { + 'title': 'name', + 'description': 'description', + })) diff --git a/yt_dlp/extractor/cineverse.py b/yt_dlp/extractor/cineverse.py new file mode 100644 index 000000000..c9fa789b7 --- /dev/null +++ b/yt_dlp/extractor/cineverse.py @@ -0,0 +1,136 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + filter_dict, + int_or_none, + parse_age_limit, + smuggle_url, + traverse_obj, + unsmuggle_url, + url_or_none, +) + + +class CineverseBaseIE(InfoExtractor): + _VALID_URL_BASE = r'https://www\.(?P%s)' % '|'.join(map(re.escape, ( + 'cineverse.com', + 'asiancrush.com', + 'dovechannel.com', + 'screambox.com', + 'midnightpulp.com', + 'fandor.com', + 'retrocrush.tv', + ))) + + +class CineverseIE(CineverseBaseIE): + _VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P[A-Z0-9]+)' + _TESTS = [{ + 'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt', + 'skip': 'geo-blocked', + 'info_dict': { + 'title': 'Women Who Flirt', + 'ext': 'mp4', + 'id': 'DMR00018919', + 'modified_timestamp': 1678744575289, + 'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'], + 'duration': 5811.597, + 'description': 'md5:892fd62a05611d394141e8394ace0bc6', + 'age_limit': 13, + } + }, { + 'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie', + 'skip': 'geo-blocked', + 'info_dict': { + 'title': 'Archenemy! Crystal Bowie', + 'ext': 'mp4', + 'id': '1000000023016', + 'episode_number': 3, + 'season_number': 1, + 'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'], + 'age_limit': 0, + 'episode': 'Episode 3', + 'season': 'Season 1', + 'duration': 1485.067, + 'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.', + 'series': 'Space Adventure COBRA (Original Japanese)', + } + }] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, default={}) + self._initialize_geo_bypass({ + 'countries': smuggled_data.get('geo_countries'), + }) + video_id = self._match_id(url) + html = self._download_webpage(url, video_id) + idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails'] + + if idetails.get('err_code') == 1200: + self.raise_geo_restricted( + 'This video is not available from your location due to geo restriction. ' + 'You may be able to bypass it by using the /details/ page instead of the /watch/ page', + countries=smuggled_data.get('geo_countries')) + + return { + 'subtitles': filter_dict({ + 'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None, + }), + 'formats': self._extract_m3u8_formats(idetails['url'], video_id), + **traverse_obj(idetails, { + 'title': 'title', + 'id': ('details', 'item_id'), + 'description': ('details', 'description'), + 'duration': ('duration', {lambda x: x / 1000}), + 'cast': ('details', 'cast', {lambda x: x.split(', ')}), + 'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}), + 'season_number': ('details', 'season', {int_or_none}), + 'episode_number': ('details', 'episode', {int_or_none}), + 'age_limit': ('details', 'rating_code', {parse_age_limit}), + 'series': ('details', 'series_details', 'title'), + }), + } + + +class CineverseDetailsIE(CineverseBaseIE): + _VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P[A-Z0-9]+)' + _TESTS = [{ + 'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)', + 'playlist_mincount': 30, + 'info_dict': { + 'title': 'Space Adventure COBRA (Original Japanese)', + 'id': '1000000023012', + } + }, { + 'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel', + 'info_dict': { + 'id': 'NNVG4938', + 'ext': 'mp4', + 'title': 'Hansel and Gretel', + 'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d', + 'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'], + 'duration': 7030.732, + }, + }] + + def _real_extract(self, url): + host, series_id = self._match_valid_url(url).group('host', 'id') + html = self._download_webpage(url, series_id) + pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps'] + + geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')})) + geoblocked = traverse_obj(pageprops, ( + 'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.' + + def item_result(item): + item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}' + if geoblocked: + item_url = smuggle_url(item_url, {'geo_countries': geo_countries}) + return self.url_result(item_url, CineverseIE) + + season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title'])) + if season: + return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id, + playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title'))) + return item_result(pageprops['itemDetailsData']) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 7deab995c..c94b4abdc 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1687,7 +1687,7 @@ class InfoExtractor: def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) - FUNCTION_RE = r'\(function\((?P.*?)\){return\s+(?P{.*?})\s*;?\s*}\((?P.*?)\)' + FUNCTION_RE = r'\(function\((?P.*?)\){(?:.*?)return\s+(?P{.*?})\s*;?\s*}\((?P.*?)\)' js, arg_keys, arg_vals = self._search_regex( (rf'', rf'{rectx}\(.*?{FUNCTION_RE}'), webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index fa40844df..ee8893d5a 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -1,31 +1,72 @@ import time import hashlib -import re import urllib +import uuid from .common import InfoExtractor +from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, + UserNotLive, + determine_ext, + int_or_none, + js_to_json, + parse_resolution, + str_or_none, + traverse_obj, unescapeHTML, - unified_strdate, + url_or_none, + urlencode_postdata, urljoin, ) -class DouyuTVIE(InfoExtractor): - IE_DESC = '斗鱼' +class DouyuBaseIE(InfoExtractor): + def _download_cryptojs_md5(self, video_id): + for url in [ + 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js', + 'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js', + ]: + js_code = self._download_webpage( + url, video_id, note='Downloading signing dependency', fatal=False) + if js_code: + self.cache.store('douyu', 'crypto-js-md5', js_code) + return js_code + raise ExtractorError('Unable to download JS dependency (crypto-js/md5)') + + def _get_cryptojs_md5(self, video_id): + return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id) + + def _calc_sign(self, sign_func, video_id, a): + b = uuid.uuid4().hex + c = round(time.time()) + js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))' + phantom = PhantomJSwrapper(self) + result = phantom.execute(js_script, video_id, + note='Executing JS signing script').strip() + return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()} + + def _search_js_sign_func(self, webpage, fatal=True): + # The greedy look-behind ensures last possible script tag is matched + return self._search_regex( + r'(?:]*>(.*?ub98484234.*?)', webpage, 'JS sign func', fatal=fatal) + + +class DouyuTVIE(DouyuBaseIE): + IE_DESC = '斗鱼直播' _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P[A-Za-z0-9]+)' _TESTS = [{ - 'url': 'http://www.douyutv.com/iseven', + 'url': 'https://www.douyu.com/pigff', 'info_dict': { - 'id': '17732', - 'display_id': 'iseven', - 'ext': 'flv', - 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': r're:.*m7show@163\.com.*', - 'thumbnail': r're:^https?://.*\.png', - 'uploader': '7师傅', + 'id': '24422', + 'display_id': 'pigff', + 'ext': 'mp4', + 'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群', + 'thumbnail': str, + 'uploader': 'pigff', 'is_live': True, + 'live_status': 'is_live', }, 'params': { 'skip_download': True, @@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor): 'only_matching': True, }] + def _get_sign_func(self, room_id, video_id): + return self._download_json( + f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id, + note='Getting signing script')['data'][f'room{room_id}'] + + def _extract_stream_formats(self, stream_formats): + formats = [] + for stream_info in traverse_obj(stream_formats, (..., 'data')): + stream_url = urljoin( + traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live')) + if stream_url: + rate_id = traverse_obj(stream_info, ('rate', {int_or_none})) + rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False) + ext = determine_ext(stream_url) + formats.append({ + 'url': stream_url, + 'format_id': str_or_none(rate_id), + 'ext': 'mp4' if ext == 'm3u8' else ext, + 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', + 'quality': rate_id % -10000 if rate_id is not None else None, + **traverse_obj(rate_info, { + 'format': ('name', {str_or_none}), + 'tbr': ('bit', {int_or_none}), + }), + }) + return formats + def _real_extract(self, url): video_id = self._match_id(url) - if video_id.isdigit(): - room_id = video_id - else: - page = self._download_webpage(url, video_id) - room_id = self._html_search_regex( - r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') + webpage = self._download_webpage(url, video_id) + room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id') + + if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1': + raise UserNotLive('The channel is auto-playing VODs', video_id=video_id) + if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2': + raise UserNotLive(video_id=video_id) # Grab metadata from API params = { @@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor): 'time': int(time.time()), } params['auth'] = hashlib.md5( - f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest() - room = self._download_json( + f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest() + room = traverse_obj(self._download_json( f'http://www.douyutv.com/api/v1/room/{room_id}', video_id, - note='Downloading room info', query=params)['data'] + note='Downloading room info', query=params, fatal=False), 'data') # 1 = live, 2 = offline - if room.get('show_status') == '2': - raise ExtractorError('Live stream is offline', expected=True) + if traverse_obj(room, 'show_status') == '2': + raise UserNotLive(video_id=video_id) - video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL')) - formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id) + js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id) + form_data = { + 'rate': 0, + **self._calc_sign(js_sign_func, video_id, room_id), + } + stream_formats = [self._download_json( + f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', + video_id, note="Downloading livestream format", + data=urlencode_postdata(form_data))] - title = unescapeHTML(room['room_name']) - description = room.get('show_details') - thumbnail = room.get('room_src') - uploader = room.get('nickname') + for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')): + if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')): + form_data['rate'] = rate_id + stream_formats.append(self._download_json( + f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', + video_id, note=f'Downloading livestream format {rate_id}', + data=urlencode_postdata(form_data))) return { 'id': room_id, - 'display_id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, + 'formats': self._extract_stream_formats(stream_formats), 'is_live': True, - 'subtitles': subs, - 'formats': formats, + **traverse_obj(room, { + 'display_id': ('url', {str}, {lambda i: i[1:]}), + 'title': ('room_name', {unescapeHTML}), + 'description': ('show_details', {str}), + 'uploader': ('nickname', {str}), + 'thumbnail': ('room_src', {url_or_none}), + }) } -class DouyuShowIE(InfoExtractor): +class DouyuShowIE(DouyuBaseIE): _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P[0-9a-zA-Z]+)' _TESTS = [{ - 'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw', - 'md5': '0c2cfd068ee2afe657801269b2d86214', + 'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY', 'info_dict': { - 'id': 'rjNBdvnVXNzvE2yw', + 'id': 'mPyq7oVNe5Yv1gLY', 'ext': 'mp4', - 'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场', - 'duration': 7150.08, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': '陈一发儿', - 'uploader_id': 'XrZwYelr5wbK', - 'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK', - 'upload_date': '20170402', + 'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃', + 'duration': 633, + 'thumbnail': str, + 'uploader': '美食作家王刚V', + 'uploader_id': 'OVAO4NVx1m7Q', + 'timestamp': 1661850002, + 'upload_date': '20220830', + 'view_count': int, + 'tags': ['美食', '美食综合'], }, }, { 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw', 'only_matching': True, }] + _FORMATS = { + 'super': '原画', + 'high': '超清', + 'normal': '高清', + } + + _QUALITIES = { + 'super': -1, + 'high': -2, + 'normal': -3, + } + + _RESOLUTIONS = { + 'super': '1920x1080', + 'high': '1280x720', + 'normal': '852x480', + } + def _real_extract(self, url): url = url.replace('vmobile.', 'v.') video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - room_info = self._parse_json(self._search_regex( - r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id) + video_info = self._search_json( + r'') + + return { + 'id': video_id, + 'formats': self._extract_m3u8_formats( + data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'), + 'age_limit': 18, + **traverse_obj(data, { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'release_timestamp': ('created_at', {parse_iso8601}), + 'modified_timestamp': ('updated_at', {parse_iso8601}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': ('user', 'id', {str_or_none}), + 'uploader_url': ('user', 'permalink_url', {url_or_none}), + 'thumbnail': ('artwork_url', {url_or_none}), + 'duration': ('duration', {int_or_none}), + 'view_count': ('plays', {int_or_none}), + 'comment_count': ('comment_count', {int_or_none}), + 'webpage_url': ('permalink_url', {url_or_none}), + }), + } diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py index 86967b631..b96f2e4cb 100644 --- a/yt_dlp/extractor/expressen.py +++ b/yt_dlp/extractor/expressen.py @@ -11,8 +11,8 @@ class ExpressenIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?:www\.)?(?:expressen|di)\.se/ - (?:(?:tvspelare/video|videoplayer/embed)/)? - tv/(?:[^/]+/)* + (?:(?:tvspelare/video|video-?player/embed)/)? + (?:tv|nyheter)/(?:[^/?#]+/)* (?P[^/?#&]+) ''' _EMBED_REGEX = [r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1'] @@ -42,6 +42,12 @@ class ExpressenIE(InfoExtractor): }, { 'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di', 'only_matching': True, + }, { + 'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn', + 'only_matching': True, + }, { + 'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index 898390583..ef14b57d0 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -60,7 +60,7 @@ class GofileIE(InfoExtractor): account_data = self._download_json( 'https://api.gofile.io/createAccount', None, note='Getting a new guest account') self._TOKEN = account_data['data']['token'] - self._set_cookie('gofile.io', 'accountToken', self._TOKEN) + self._set_cookie('.gofile.io', 'accountToken', self._TOKEN) def _entries(self, file_id): query_params = { diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py index 64875f8ce..1c4f105e9 100644 --- a/yt_dlp/extractor/ign.py +++ b/yt_dlp/extractor/ign.py @@ -197,10 +197,6 @@ class IGNVideoIE(IGNBaseIE): 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', 'duration': 298, 'tags': 'count:13', - 'display_id': '112203', - 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', - 'duration': 298, - 'tags': 'count:13', }, 'expected_warnings': ['HTTP Error 400: Bad Request'], }, { diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 6dec1510d..f7aa579b3 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -134,10 +134,17 @@ class IPrimaIE(InfoExtractor): ), webpage, 'real id', group='id', default=None) if not video_id: - nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data') + nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data', fatal=False) video_id = traverse_obj( nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False) + if not video_id: + nuxt_data = self._search_json( + r']+\bid=["\']__NUXT_DATA__["\'][^>]*>', + webpage, 'nuxt data', None, end_pattern=r'', contains_pattern=r'\[(?s:.+)\]') + + video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d+', v), get_all=False) + if not video_id: self.raise_no_formats('Unable to extract video ID from webpage') diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index fa602ba88..3368ab1d9 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -499,9 +499,10 @@ class IqIE(InfoExtractor): 'tm': tm, 'qdy': 'a', 'qds': 0, - 'k_ft1': 141287244169348, - 'k_ft4': 34359746564, - 'k_ft5': 1, + 'k_ft1': '143486267424900', + 'k_ft4': '1572868', + 'k_ft7': '4', + 'k_ft5': '1', 'bop': JSON.stringify({ 'version': '10.0', 'dfp': dfp @@ -529,14 +530,22 @@ class IqIE(InfoExtractor): webpack_js_url = self._proto_relative_url(self._search_regex( r'' _ANVATO_PREFIX = 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + _CLIENT_DATA = { + 'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g', + 'clientSecret': 'CZuvCL49d9OwfGsR', + 'deviceId': str(uuid.uuid4()), + 'deviceInfo': base64.b64encode(json.dumps({ + 'model': 'desktop', + 'version': 'Chrome', + 'osName': 'Windows', + 'osVersion': '10.0', + }, separators=(',', ':')).encode()).decode(), + 'networkType': 'other', + 'nflClaimGroupsToAdd': [], + 'nflClaimGroupsToRemove': [], + } + _ACCOUNT_INFO = {} + _API_KEY = None + + _TOKEN = None + _TOKEN_EXPIRY = 0 + + def _get_account_info(self, url, slug): + if not self._API_KEY: + webpage = self._download_webpage(url, slug, fatal=False) or '' + self._API_KEY = self._search_regex( + r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key', + fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f' + + cookies = self._get_cookies('https://auth-id.nfl.com/') + login_token = traverse_obj(cookies, ( + (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False) + if not login_token: + self.raise_login_required() + if 'ucid' not in cookies: + raise ExtractorError( + 'Required cookies for the auth-id.nfl.com domain were not found among passed cookies. ' + 'If using --cookies, these cookies must be exported along with .nfl.com cookies, ' + 'or else try using --cookies-from-browser instead', expected=True) + + account = self._download_json( + 'https://auth-id.nfl.com/accounts.getAccountInfo', slug, + note='Downloading account info', data=urlencode_postdata({ + 'include': 'profile,data', + 'lang': 'en', + 'APIKey': self._API_KEY, + 'sdk': 'js_latest', + 'login_token': login_token, + 'authMode': 'cookie', + 'pageURL': url, + 'sdkBuild': traverse_obj(cookies, ( + 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'), + 'format': 'json', + }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + self._ACCOUNT_INFO = traverse_obj(account, { + 'signatureTimestamp': 'signatureTimestamp', + 'uid': 'UID', + 'uidSignature': 'UIDSignature', + }) + + if len(self._ACCOUNT_INFO) != 3: + raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) + + def _get_auth_token(self, url, slug): + if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30): + return + + if not self._ACCOUNT_INFO: + self._get_account_info(url, slug) + + token = self._download_json( + 'https://api.nfl.com/identity/v3/token%s' % ( + '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), + slug, headers={'Content-Type': 'application/json'}, note='Downloading access token', + data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) + + self._TOKEN = token['accessToken'] + self._TOKEN_EXPIRY = token['expiresIn'] + self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] + def _parse_video_config(self, video_config, display_id): video_config = self._parse_json(video_config, display_id) item = video_config['playlist'][0] @@ -168,7 +247,7 @@ class NFLArticleIE(NFLBaseIE): class NFLPlusReplayIE(NFLBaseIE): IE_NAME = 'nfl.com:plus:replay' - _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/[\w-]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/games/(?P[\w-]+)(?:/(?P\d+))?' _TESTS = [{ 'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1/1572108', 'info_dict': { @@ -185,23 +264,92 @@ class NFLPlusReplayIE(NFLBaseIE): 'thumbnail': r're:^https?://.*\.jpg', }, 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-vikings-2022-post-1', + 'playlist_count': 4, + 'info_dict': { + 'id': 'giants-at-vikings-2022-post-1', + }, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4', + 'playlist_count': 2, + 'info_dict': { + 'id': 'giants-at-patriots-2011-pre-4', + }, + }, { + 'note': 'Subscription required', + 'url': 'https://www.nfl.com/plus/games/giants-at-patriots-2011-pre-4', + 'info_dict': { + 'id': '950701', + 'ext': 'mp4', + 'title': 'Giants @ Patriots', + 'description': 'Giants at Patriots on September 01, 2011', + 'uploader': 'NFL', + 'upload_date': '20210724', + 'timestamp': 1627085874, + 'duration': 1532, + 'categories': ['Game Highlights'], + 'tags': ['play-by-play'], + 'thumbnail': r're:^https?://.*\.jpg', + }, + 'params': { + 'skip_download': 'm3u8', + 'extractor_args': {'nflplusreplay': {'type': ['condensed_game']}}, + }, }] + _REPLAY_TYPES = { + 'full_game': 'Full Game', + 'full_game_spanish': 'Full Game - Spanish', + 'condensed_game': 'Condensed Game', + 'all_22': 'All-22', + } + def _real_extract(self, url): - video_id = self._match_id(url) - return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + slug, video_id = self._match_valid_url(url).group('slug', 'id') + requested_types = self._configuration_arg('type', ['all']) + if 'all' in requested_types: + requested_types = list(self._REPLAY_TYPES.keys()) + requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types)) + + if not video_id: + self._get_auth_token(url, slug) + headers = {'Authorization': f'Bearer {self._TOKEN}'} + game_id = self._download_json( + f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug, + 'Downloading game ID', query={'withExternalIds': 'true'}, headers=headers)['id'] + replays = self._download_json( + 'https://api.nfl.com/content/v1/videos/replays', slug, 'Downloading replays JSON', + query={'gameId': game_id}, headers=headers) + if len(requested_types) == 1: + video_id = traverse_obj(replays, ( + 'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False) + + if video_id: + return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + + def entries(): + for replay in traverse_obj( + replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types) + ): + video_id = replay['mcpPlaybackId'] + yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + + return self.playlist_result(entries(), slug) class NFLPlusEpisodeIE(NFLBaseIE): IE_NAME = 'nfl.com:plus:episode' _VALID_URL = r'https?://(?:www\.)?nfl.com/plus/episodes/(?P[\w-]+)' _TESTS = [{ - 'note': 'premium content', + 'note': 'Subscription required', 'url': 'https://www.nfl.com/plus/episodes/kurt-s-qb-insider-conference-championships', 'info_dict': { 'id': '1576832', 'ext': 'mp4', - 'title': 'Kurt\'s QB Insider: Conference Championships', + 'title': 'Conference Championships', 'description': 'md5:944f7fab56f7a37430bf8473f5473857', 'uploader': 'NFL', 'upload_date': '20230127', @@ -214,85 +362,9 @@ class NFLPlusEpisodeIE(NFLBaseIE): 'params': {'skip_download': 'm3u8'}, }] - _CLIENT_DATA = { - 'clientKey': '4cFUW6DmwJpzT9L7LrG3qRAcABG5s04g', - 'clientSecret': 'CZuvCL49d9OwfGsR', - 'deviceId': str(uuid.uuid4()), - 'deviceInfo': base64.b64encode(json.dumps({ - 'model': 'desktop', - 'version': 'Chrome', - 'osName': 'Windows', - 'osVersion': '10.0', - }, separators=(',', ':')).encode()).decode(), - 'networkType': 'other', - 'nflClaimGroupsToAdd': [], - 'nflClaimGroupsToRemove': [], - } - _ACCOUNT_INFO = {} - _API_KEY = None - - _TOKEN = None - _TOKEN_EXPIRY = 0 - - def _get_account_info(self, url, video_id): - cookies = self._get_cookies('https://www.nfl.com/') - login_token = traverse_obj(cookies, ( - (f'glt_{self._API_KEY}', f'gig_loginToken_{self._API_KEY}', - lambda k, _: k.startswith('glt_') or k.startswith('gig_loginToken_')), - {lambda x: x.value}), get_all=False) - if not login_token: - self.raise_login_required() - - account = self._download_json( - 'https://auth-id.nfl.com/accounts.getAccountInfo', video_id, - note='Downloading account info', data=urlencode_postdata({ - 'include': 'profile,data', - 'lang': 'en', - 'APIKey': self._API_KEY, - 'sdk': 'js_latest', - 'login_token': login_token, - 'authMode': 'cookie', - 'pageURL': url, - 'sdkBuild': traverse_obj(cookies, ( - 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='13642'), - 'format': 'json', - }), headers={'Content-Type': 'application/x-www-form-urlencoded'}) - - self._ACCOUNT_INFO = traverse_obj(account, { - 'signatureTimestamp': 'signatureTimestamp', - 'uid': 'UID', - 'uidSignature': 'UIDSignature', - }) - - if len(self._ACCOUNT_INFO) != 3: - raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) - - def _get_auth_token(self, url, video_id): - if not self._ACCOUNT_INFO: - self._get_account_info(url, video_id) - - token = self._download_json( - 'https://api.nfl.com/identity/v3/token%s' % ( - '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), - video_id, headers={'Content-Type': 'application/json'}, note='Downloading access token', - data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) - - self._TOKEN = token['accessToken'] - self._TOKEN_EXPIRY = token['expiresIn'] - self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] - def _real_extract(self, url): slug = self._match_id(url) - - if not self._API_KEY: - webpage = self._download_webpage(url, slug, fatal=False) or '' - self._API_KEY = self._search_regex( - r'window\.gigyaApiKey=["\'](\w+)["\'];', webpage, 'API key', - default='3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f') - - if not self._TOKEN or self._TOKEN_EXPIRY <= int(time.time()): - self._get_auth_token(url, slug) - + self._get_auth_token(url, slug) video_id = self._download_json( f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={ 'Authorization': f'Bearer {self._TOKEN}', diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index fbd6a18f6..bcbc2279f 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -28,6 +28,44 @@ class NhkBaseIE(InfoExtractor): m_id, lang, '/all' if is_video else ''), m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or [] + def _get_api_info(self, refresh=True): + if not refresh: + return self.cache.load('nhk', 'api_info') + + self.cache.store('nhk', 'api_info', {}) + movie_player_js = self._download_webpage( + 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None, + note='Downloading stream API information') + api_info = { + 'url': self._search_regex( + r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'), + 'token': self._search_regex( + r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'), + } + self.cache.store('nhk', 'api_info', api_info) + return api_info + + def _extract_formats_and_subtitles(self, vod_id): + for refresh in (False, True): + api_info = self._get_api_info(refresh) + if not api_info: + continue + + api_url = api_info.pop('url') + stream_url = traverse_obj( + self._download_json( + api_url, vod_id, 'Downloading stream url info', fatal=False, query={ + **api_info, + 'type': 'json', + 'optional_id': vod_id, + 'active_flg': 1, + }), + ('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False) + if stream_url: + return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id) + + raise ExtractorError('Unable to extract stream url') + def _extract_episode_info(self, url, episode=None): fetch_episode = episode is None lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups() @@ -67,12 +105,14 @@ class NhkBaseIE(InfoExtractor): } if is_video: vod_id = episode['vod_id'] + formats, subs = self._extract_formats_and_subtitles(vod_id) + info.update({ - '_type': 'url_transparent', - 'ie_key': 'Piksel', - 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id, 'id': vod_id, + 'formats': formats, + 'subtitles': subs, }) + else: if fetch_episode: audio_path = episode['audio']['audio'] diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py new file mode 100644 index 000000000..89af3f7b5 --- /dev/null +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -0,0 +1,426 @@ +import functools +import json + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + OnDemandPagedList, + filter_dict, + int_or_none, + parse_qs, + str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class NiconicoChannelPlusBaseIE(InfoExtractor): + _WEBPAGE_BASE_URL = 'https://nicochannel.jp' + + def _call_api(self, path, item_id, *args, **kwargs): + return self._download_json( + f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, *args, **kwargs) + + def _find_fanclub_site_id(self, channel_name): + fanclub_list_json = self._call_api( + 'content_providers/channels', item_id=f'channels/{channel_name}', + note='Fetching channel list', errnote='Unable to fetch channel list', + )['data']['content_providers'] + fanclub_id = traverse_obj(fanclub_list_json, ( + lambda _, v: v['domain'] == f'{self._WEBPAGE_BASE_URL}/{channel_name}', 'id'), + get_all=False) + if not fanclub_id: + raise ExtractorError(f'Channel {channel_name} does not exist', expected=True) + return fanclub_id + + def _get_channel_base_info(self, fanclub_site_id): + return traverse_obj(self._call_api( + f'fanclub_sites/{fanclub_site_id}/page_base_info', item_id=f'fanclub_sites/{fanclub_site_id}', + note='Fetching channel base info', errnote='Unable to fetch channel base info', fatal=False, + ), ('data', 'fanclub_site', {dict})) or {} + + def _get_channel_user_info(self, fanclub_site_id): + return traverse_obj(self._call_api( + f'fanclub_sites/{fanclub_site_id}/user_info', item_id=f'fanclub_sites/{fanclub_site_id}', + note='Fetching channel user info', errnote='Unable to fetch channel user info', fatal=False, + data=json.dumps('null').encode('ascii'), + ), ('data', 'fanclub_site', {dict})) or {} + + +class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): + IE_NAME = 'NiconicoChannelPlus' + IE_DESC = 'ニコニコチャンネルプラス' + _VALID_URL = r'https?://nicochannel\.jp/(?P[\w.-]+)/(?:video|live)/(?Psm\w+)' + _TESTS = [{ + 'url': 'https://nicochannel.jp/kaorin/video/smsDd8EdFLcVZk9yyAhD6H7H', + 'info_dict': { + 'id': 'smsDd8EdFLcVZk9yyAhD6H7H', + 'title': '前田佳織里はニコ生がしたい!', + 'ext': 'mp4', + 'channel': '前田佳織里の世界攻略計画', + 'channel_id': 'kaorin', + 'channel_url': 'https://nicochannel.jp/kaorin', + 'live_status': 'not_live', + 'thumbnail': 'https://nicochannel.jp/public_html/contents/video_pages/74/thumbnail_path', + 'description': '2021年11月に放送された\n「前田佳織里はニコ生がしたい!」アーカイブになります。', + 'timestamp': 1641360276, + 'duration': 4097, + 'comment_count': int, + 'view_count': int, + 'tags': [], + 'upload_date': '20220105', + }, + 'params': { + 'skip_download': True, + }, + }, { + # age limited video; test purpose channel. + 'url': 'https://nicochannel.jp/testman/video/smDXbcrtyPNxLx9jc4BW69Ve', + 'info_dict': { + 'id': 'smDXbcrtyPNxLx9jc4BW69Ve', + 'title': 'test oshiro', + 'ext': 'mp4', + 'channel': '本番チャンネルプラステストマン', + 'channel_id': 'testman', + 'channel_url': 'https://nicochannel.jp/testman', + 'age_limit': 18, + 'live_status': 'was_live', + 'timestamp': 1666344616, + 'duration': 86465, + 'comment_count': int, + 'view_count': int, + 'tags': [], + 'upload_date': '20221021', + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + content_code, channel_id = self._match_valid_url(url).group('code', 'channel') + fanclub_site_id = self._find_fanclub_site_id(channel_id) + + data_json = self._call_api( + f'video_pages/{content_code}', item_id=content_code, headers={'fc_use_device': 'null'}, + note='Fetching video page info', errnote='Unable to fetch video page info', + )['data']['video_page'] + + live_status, session_id = self._get_live_status_and_session_id(content_code, data_json) + + release_timestamp_str = data_json.get('live_scheduled_start_at') + + formats = [] + + if live_status == 'is_upcoming': + if release_timestamp_str: + msg = f'This live event will begin at {release_timestamp_str} UTC' + else: + msg = 'This event has not started yet' + self.raise_no_formats(msg, expected=True, video_id=content_code) + else: + formats = self._extract_m3u8_formats( + # "authenticated_url" is a format string that contains "{session_id}". + m3u8_url=data_json['video_stream']['authenticated_url'].format(session_id=session_id), + video_id=content_code) + + return { + 'id': content_code, + 'formats': formats, + '_format_sort_fields': ('tbr', 'vcodec', 'acodec'), + 'channel': self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name'), + 'channel_id': channel_id, + 'channel_url': f'{self._WEBPAGE_BASE_URL}/{channel_id}', + 'age_limit': traverse_obj(self._get_channel_user_info(fanclub_site_id), ('content_provider', 'age_limit')), + 'live_status': live_status, + 'release_timestamp': unified_timestamp(release_timestamp_str), + **traverse_obj(data_json, { + 'title': ('title', {str}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + 'description': ('description', {str}), + 'timestamp': ('released_at', {unified_timestamp}), + 'duration': ('active_video_filename', 'length', {int_or_none}), + 'comment_count': ('video_aggregate_info', 'number_of_comments', {int_or_none}), + 'view_count': ('video_aggregate_info', 'total_views', {int_or_none}), + 'tags': ('video_tags', ..., 'tag', {str}), + }), + '__post_extractor': self.extract_comments( + content_code=content_code, + comment_group_id=traverse_obj(data_json, ('video_comment_setting', 'comment_group_id'))), + } + + def _get_comments(self, content_code, comment_group_id): + item_id = f'{content_code}/comments' + + if not comment_group_id: + return None + + comment_access_token = self._call_api( + f'video_pages/{content_code}/comments_user_token', item_id, + note='Getting comment token', errnote='Unable to get comment token', + )['data']['access_token'] + + comment_list = self._download_json( + 'https://comm-api.sheeta.com/messages.history', video_id=item_id, + note='Fetching comments', errnote='Unable to fetch comments', + headers={'Content-Type': 'application/json'}, + query={ + 'sort_direction': 'asc', + 'limit': int_or_none(self._configuration_arg('max_comments', [''])[0]) or 120, + }, + data=json.dumps({ + 'token': comment_access_token, + 'group_id': comment_group_id, + }).encode('ascii')) + + for comment in traverse_obj(comment_list, ...): + yield traverse_obj(comment, { + 'author': ('nickname', {str}), + 'author_id': ('sender_id', {str_or_none}), + 'id': ('id', {str_or_none}), + 'text': ('message', {str}), + 'timestamp': (('updated_at', 'sent_at', 'created_at'), {unified_timestamp}), + 'author_is_uploader': ('sender_id', {lambda x: x == '-1'}), + }, get_all=False) + + def _get_live_status_and_session_id(self, content_code, data_json): + video_type = data_json.get('type') + live_finished_at = data_json.get('live_finished_at') + + payload = {} + if video_type == 'vod': + if live_finished_at: + live_status = 'was_live' + else: + live_status = 'not_live' + elif video_type == 'live': + if not data_json.get('live_started_at'): + return 'is_upcoming', '' + + if not live_finished_at: + live_status = 'is_live' + else: + live_status = 'was_live' + payload = {'broadcast_type': 'dvr'} + + video_allow_dvr_flg = traverse_obj(data_json, ('video', 'allow_dvr_flg')) + video_convert_to_vod_flg = traverse_obj(data_json, ('video', 'convert_to_vod_flg')) + + self.write_debug(f'allow_dvr_flg = {video_allow_dvr_flg}, convert_to_vod_flg = {video_convert_to_vod_flg}.') + + if not (video_allow_dvr_flg and video_convert_to_vod_flg): + raise ExtractorError( + 'Live was ended, there is no video for download.', video_id=content_code, expected=True) + else: + raise ExtractorError(f'Unknown type: {video_type}', video_id=content_code, expected=False) + + self.write_debug(f'{content_code}: video_type={video_type}, live_status={live_status}') + + session_id = self._call_api( + f'video_pages/{content_code}/session_ids', item_id=f'{content_code}/session', + data=json.dumps(payload).encode('ascii'), headers={ + 'Content-Type': 'application/json', + 'fc_use_device': 'null', + 'origin': 'https://nicochannel.jp', + }, + note='Getting session id', errnote='Unable to get session id', + )['data']['session_id'] + + return live_status, session_id + + +class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE): + _PAGE_SIZE = 12 + + def _fetch_paged_channel_video_list(self, path, query, channel_name, item_id, page): + response = self._call_api( + path, item_id, query={ + **query, + 'page': (page + 1), + 'per_page': self._PAGE_SIZE, + }, + headers={'fc_use_device': 'null'}, + note=f'Getting channel info (page {page + 1})', + errnote=f'Unable to get channel info (page {page + 1})') + + for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')): + # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD + yield self.url_result( + f'{self._WEBPAGE_BASE_URL}/{channel_name}/video/{content_code}', NiconicoChannelPlusIE) + + +class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE): + IE_NAME = 'NiconicoChannelPlus:channel:videos' + IE_DESC = 'ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos' + _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/videos(?:\?.*)?' + _TESTS = [{ + # query: None + 'url': 'https://nicochannel.jp/testman/videos', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: None + 'url': 'https://nicochannel.jp/testtarou/videos', + 'info_dict': { + 'id': 'testtarou-videos', + 'title': 'チャンネルプラステスト太郎-videos', + }, + 'playlist_mincount': 2, + }, { + # query: None + 'url': 'https://nicochannel.jp/testjirou/videos', + 'info_dict': { + 'id': 'testjirou-videos', + 'title': 'チャンネルプラステスト二郎-videos', + }, + 'playlist_mincount': 12, + }, { + # query: tag + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: vodType + 'url': 'https://nicochannel.jp/testman/videos?vodType=1', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: sort + 'url': 'https://nicochannel.jp/testman/videos?sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: tag, vodType + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: tag, sort + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }, { + # query: vodType, sort + 'url': 'https://nicochannel.jp/testman/videos?vodType=1&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 18, + }, { + # query: tag, vodType, sort + 'url': 'https://nicochannel.jp/testman/videos?tag=%E6%A4%9C%E8%A8%BC%E7%94%A8&vodType=1&sort=-released_at', + 'info_dict': { + 'id': 'testman-videos', + 'title': '本番チャンネルプラステストマン-videos', + }, + 'playlist_mincount': 6, + }] + + def _real_extract(self, url): + """ + API parameters: + sort: + -released_at 公開日が新しい順 (newest to oldest) + released_at 公開日が古い順 (oldest to newest) + -number_of_vod_views 再生数が多い順 (most play count) + number_of_vod_views コメントが多い順 (most comments) + vod_type (is "vodType" in "url"): + 0 すべて (all) + 1 会員限定 (members only) + 2 一部無料 (partially free) + 3 レンタル (rental) + 4 生放送アーカイブ (live archives) + 5 アップロード動画 (uploaded videos) + """ + + channel_id = self._match_id(url) + fanclub_site_id = self._find_fanclub_site_id(channel_id) + channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name') + qs = parse_qs(url) + + return self.playlist_result( + OnDemandPagedList( + functools.partial( + self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/video_pages', + filter_dict({ + 'tag': traverse_obj(qs, ('tag', 0)), + 'sort': traverse_obj(qs, ('sort', 0), default='-released_at'), + 'vod_type': traverse_obj(qs, ('vodType', 0), default='0'), + }), + channel_id, f'{channel_id}/videos'), + self._PAGE_SIZE), + playlist_id=f'{channel_id}-videos', playlist_title=f'{channel_name}-videos') + + +class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE): + IE_NAME = 'NiconicoChannelPlus:channel:lives' + IE_DESC = 'ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives' + _VALID_URL = r'https?://nicochannel\.jp/(?P[a-z\d\._-]+)/lives' + _TESTS = [{ + 'url': 'https://nicochannel.jp/testman/lives', + 'info_dict': { + 'id': 'testman-lives', + 'title': '本番チャンネルプラステストマン-lives', + }, + 'playlist_mincount': 18, + }, { + 'url': 'https://nicochannel.jp/testtarou/lives', + 'info_dict': { + 'id': 'testtarou-lives', + 'title': 'チャンネルプラステスト太郎-lives', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://nicochannel.jp/testjirou/lives', + 'info_dict': { + 'id': 'testjirou-lives', + 'title': 'チャンネルプラステスト二郎-lives', + }, + 'playlist_mincount': 6, + }] + + def _real_extract(self, url): + """ + API parameters: + live_type: + 1 放送中 (on air) + 2 放送予定 (scheduled live streams, oldest to newest) + 3 過去の放送 - すべて (all ended live streams, newest to oldest) + 4 過去の放送 - 生放送アーカイブ (all archives for live streams, oldest to newest) + We use "4" instead of "3" because some recently ended live streams could not be downloaded. + """ + + channel_id = self._match_id(url) + fanclub_site_id = self._find_fanclub_site_id(channel_id) + channel_name = self._get_channel_base_info(fanclub_site_id).get('fanclub_site_name') + + return self.playlist_result( + OnDemandPagedList( + functools.partial( + self._fetch_paged_channel_video_list, f'fanclub_sites/{fanclub_site_id}/live_pages', + { + 'live_type': 4, + }, + channel_id, f'{channel_id}/lives'), + self._PAGE_SIZE), + playlist_id=f'{channel_id}-lives', playlist_title=f'{channel_name}-lives') diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 8bd3fd472..bd0c4ebe3 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -6,7 +6,6 @@ from ..utils import ( determine_ext, int_or_none, js_to_json, - qualities, traverse_obj, unified_strdate, url_or_none, @@ -49,77 +48,52 @@ class NovaEmbedIE(InfoExtractor): duration = None formats = [] - player = self._parse_json( - self._search_regex( - (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P{.*?})\s*\)(?:\s*\))?\s*,', - r'Player\.init\s*\([^,]+,(?P\s*\w+\s*\?)?\s*(?P{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'), - webpage, 'player', default='{}', group='json'), video_id, fatal=False) - if player: - for format_id, format_list in player['tracks'].items(): - if not isinstance(format_list, list): - format_list = [format_list] - for format_dict in format_list: - if not isinstance(format_dict, dict): - continue - if (not self.get_param('allow_unplayable_formats') - and traverse_obj(format_dict, ('drm', 'keySystem'))): - has_drm = True - continue - format_url = url_or_none(format_dict.get('src')) - format_type = format_dict.get('type') - ext = determine_ext(format_url) - if (format_type == 'application/x-mpegURL' - or format_id == 'HLS' or ext == 'm3u8'): - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - elif (format_type == 'application/dash+xml' - or format_id == 'DASH' or ext == 'mpd'): - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash', fatal=False)) - else: - formats.append({ - 'url': format_url, - }) - duration = int_or_none(player.get('duration')) - else: - # Old path, not actual as of 08.04.2020 - bitrates = self._parse_json( - self._search_regex( - r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'), - video_id, transform_source=js_to_json) - - QUALITIES = ('lq', 'mq', 'hq', 'hd') - quality_key = qualities(QUALITIES) - - for format_id, format_list in bitrates.items(): - if not isinstance(format_list, list): - format_list = [format_list] - for format_url in format_list: - format_url = url_or_none(format_url) - if not format_url: - continue - if format_id == 'hls': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - continue - f = { + def process_format_list(format_list, format_id=""): + nonlocal formats, has_drm + if not isinstance(format_list, list): + format_list = [format_list] + for format_dict in format_list: + if not isinstance(format_dict, dict): + continue + if (not self.get_param('allow_unplayable_formats') + and traverse_obj(format_dict, ('drm', 'keySystem'))): + has_drm = True + continue + format_url = url_or_none(format_dict.get('src')) + format_type = format_dict.get('type') + ext = determine_ext(format_url) + if (format_type == 'application/x-mpegURL' + or format_id == 'HLS' or ext == 'm3u8'): + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + elif (format_type == 'application/dash+xml' + or format_id == 'DASH' or ext == 'mpd'): + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ 'url': format_url, - } - f_id = format_id - for quality in QUALITIES: - if '%s.mp4' % quality in format_url: - f_id += '-%s' % quality - f.update({ - 'quality': quality_key(quality), - 'format_note': quality.upper(), - }) - break - f['format_id'] = f_id - formats.append(f) + }) + + player = self._search_json( + r'player:', webpage, 'player', video_id, fatal=False, end_pattern=r';\s*') + if player: + for src in traverse_obj(player, ('lib', 'source', 'sources', ...)): + process_format_list(src) + duration = traverse_obj(player, ('sourceInfo', 'duration', {int_or_none})) + if not formats and not has_drm: + # older code path, in use before August 2023 + player = self._parse_json( + self._search_regex( + (r'(?:(?:replacePlaceholders|processAdTagModifier).*?:\s*)?(?:replacePlaceholders|processAdTagModifier)\s*\(\s*(?P{.*?})\s*\)(?:\s*\))?\s*,', + r'Player\.init\s*\([^,]+,(?P\s*\w+\s*\?)?\s*(?P{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)'), + webpage, 'player', group='json'), video_id) + if player: + for format_id, format_list in player['tracks'].items(): + process_format_list(format_list, format_id) + duration = int_or_none(player.get('duration')) if not formats and has_drm: self.report_drm(video_id) diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index d1fc058b9..41f591b09 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -146,7 +146,6 @@ class PlayVidsIE(PeekVidsBaseIE): 'uploader': 'Brazzers', 'age_limit': 18, 'view_count': int, - 'age_limit': 18, 'categories': list, 'tags': list, }, diff --git a/yt_dlp/extractor/piaulizaportal.py b/yt_dlp/extractor/piaulizaportal.py new file mode 100644 index 000000000..1eb6d92b7 --- /dev/null +++ b/yt_dlp/extractor/piaulizaportal.py @@ -0,0 +1,70 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + parse_qs, + time_seconds, + traverse_obj, +) + + +class PIAULIZAPortalIE(InfoExtractor): + IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM' + _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _TESTS = [{ + 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44', + 'info_dict': { + 'id': '005f18b7-e810-5618-cb82-0987c5755d44', + 'title': 'プレゼンテーションプレイヤーのサンプル', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }, { + 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1', + 'info_dict': { + 'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d', + 'title': '【確認用】視聴サンプルページ(ULIZA)', + 'live_status': 'not_live', + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0))) + if expires and expires <= time_seconds(): + raise ExtractorError('The link is expired.', video_id=video_id, expected=True) + + webpage = self._download_webpage(url, video_id) + + player_data = self._download_webpage( + self._search_regex( + r'