Merge remote-tracking branch 'origin' into yt-live-from-start-range

This commit is contained in:
Elyse 2023-06-24 14:30:12 -06:00
commit 99e6074c5d
81 changed files with 2914 additions and 973 deletions

View file

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting that yt-dlp is broken on a **supported** site
required: true
- label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -64,7 +64,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -72,8 +72,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.03.04, Current version: 2023.03.04
yt-dlp is up to date (2023.03.04)
Latest version: 2023.06.22, Current version: 2023.06.22
yt-dlp is up to date (2023.06.22)
<more lines>
render: shell
validations:

View file

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting a new site support request
required: true
- label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -76,7 +76,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -84,8 +84,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.03.04, Current version: 2023.03.04
yt-dlp is up to date (2023.03.04)
Latest version: 2023.06.22, Current version: 2023.06.22
yt-dlp is up to date (2023.06.22)
<more lines>
render: shell
validations:

View file

@ -18,7 +18,7 @@ body:
options:
- label: I'm requesting a site-specific feature
required: true
- label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -72,7 +72,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -80,8 +80,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.03.04, Current version: 2023.03.04
yt-dlp is up to date (2023.03.04)
Latest version: 2023.06.22, Current version: 2023.06.22
yt-dlp is up to date (2023.06.22)
<more lines>
render: shell
validations:

View file

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting a bug unrelated to a specific site
required: true
- label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -57,7 +57,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -65,8 +65,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.03.04, Current version: 2023.03.04
yt-dlp is up to date (2023.03.04)
Latest version: 2023.06.22, Current version: 2023.06.22
yt-dlp is up to date (2023.06.22)
<more lines>
render: shell
validations:

View file

@ -20,7 +20,7 @@ body:
required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true
- label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
required: true
@ -53,7 +53,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -61,7 +61,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.03.04, Current version: 2023.03.04
yt-dlp is up to date (2023.03.04)
Latest version: 2023.06.22, Current version: 2023.06.22
yt-dlp is up to date (2023.06.22)
<more lines>
render: shell

View file

@ -26,7 +26,7 @@ body:
required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true
- label: I've verified that I'm running yt-dlp version **2023.03.04** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
required: true
@ -59,7 +59,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.03.04 [9d339c4] (win32_exe)
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
@ -67,7 +67,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.03.04, Current version: 2023.03.04
yt-dlp is up to date (2023.03.04)
Latest version: 2023.06.22, Current version: 2023.06.22
yt-dlp is up to date (2023.06.22)
<more lines>
render: shell

View file

@ -1,20 +0,0 @@
name: Potential Duplicates
on:
issues:
types: [opened, edited]
jobs:
run:
runs-on: ubuntu-latest
steps:
- uses: wow-actions/potential-duplicates@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
label: potential-duplicate
state: all
threshold: 0.7
comment: |
This issue is potentially a duplicate of one of the following issues:
{{#issues}}
- #{{ number }} ({{ accuracy }}%)
{{/issues}}

View file

@ -409,3 +409,54 @@ Hill-98
LXYan2333
mushbite
venkata-krishnas
7vlad7
alexklapheke
arobase-che
bepvte
bergoid
blmarket
brandon-dacrib
c-basalt
CoryTibbettsDev
Cyberes
D0LLYNH0
danog
DataGhost
falbrechtskirchinger
foreignBlade
garret1317
hasezoey
hoaluvn
ItzMaxTV
ivanskodje
jo-nike
kangalio
linsui
makew0rld
menschel
mikf
mrscrapy
NDagestad
Neurognostic
NextFire
nick-cd
permunkle
pzhlkj6612
ringus1
rjy
Schmoaaaaah
sjthespian
theperfectpunk
toomyzoom
truedread
TxI5
unbeatable-101
vampirefrog
vidiot720
viktor-enzell
zhgwn
barthelmannk
berkanteber
OverlordQ
rexlambert22
Ti4eeT4e

View file

@ -4,6 +4,315 @@ # Changelog
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
### 2023.06.22
#### Core changes
- [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan)
- [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan)
- [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan)
#### Extractor changes
- [Support multiple `_VALID_URL`s](https://github.com/yt-dlp/yt-dlp/commit/5fd8367496b42c7b900b896a0d5460561a2859de) ([#5812](https://github.com/yt-dlp/yt-dlp/issues/5812)) by [nixxo](https://github.com/nixxo)
- **dplay**: GlobalCyclingNetworkPlus: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/774aa09dd6aa61ced9ec818d1f67e53414d22762) ([#7360](https://github.com/yt-dlp/yt-dlp/issues/7360)) by [bashonly](https://github.com/bashonly)
- **dropout**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/db22142f6f817ff673d417b4b78e8db497bf8ab3) ([#7304](https://github.com/yt-dlp/yt-dlp/issues/7304)) by [OverlordQ](https://github.com/OverlordQ)
- **motherless**: [Add gallery support, fix groups](https://github.com/yt-dlp/yt-dlp/commit/f2ff0f6f1914b82d4a51681a72cc0828115dcb4a) ([#7211](https://github.com/yt-dlp/yt-dlp/issues/7211)) by [rexlambert22](https://github.com/rexlambert22), [Ti4eeT4e](https://github.com/Ti4eeT4e)
- **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb)
- **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk)
- **youtube**
- [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142)
- IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively
- IOS also has higher bit-rate 'premium' formats though they are not labeled as such
- [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan)
- [Improve nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/cd810afe2ac5567c822b7424800fc470ef2d0045) by [pukkandan](https://github.com/pukkandan)
- [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan)
#### Misc. changes
- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700)
- **cleanup**
- Miscellaneous
- [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly)
- [812cdfa](https://github.com/yt-dlp/yt-dlp/commit/812cdfa06c33a40e73a8e04b3e6f42c084666a43) by [pukkandan](https://github.com/pukkandan)
### 2023.06.21
#### Important changes
- YouTube: Improved throttling and signature fixes
#### Core changes
- [Add `--compat-option playlist-match-filter`](https://github.com/yt-dlp/yt-dlp/commit/93b39cdbd9dcf351bfa0c4ee252805b4617fdca9) by [pukkandan](https://github.com/pukkandan)
- [Add `--no-quiet`](https://github.com/yt-dlp/yt-dlp/commit/d669772c65e8630162fd6555d0a578b246591921) by [pukkandan](https://github.com/pukkandan)
- [Add option `--color`](https://github.com/yt-dlp/yt-dlp/commit/8417f26b8a819cd7ffcd4e000ca3e45033e670fb) ([#6904](https://github.com/yt-dlp/yt-dlp/issues/6904)) by [Grub4K](https://github.com/Grub4K)
- [Add option `--netrc-cmd`](https://github.com/yt-dlp/yt-dlp/commit/db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb) ([#6682](https://github.com/yt-dlp/yt-dlp/issues/6682)) by [NDagestad](https://github.com/NDagestad), [pukkandan](https://github.com/pukkandan)
- [Add option `--xff`](https://github.com/yt-dlp/yt-dlp/commit/c16644642b08e2bf4130a6c5fa01395d8718c990) by [pukkandan](https://github.com/pukkandan)
- [Auto-select default format in `-f-`](https://github.com/yt-dlp/yt-dlp/commit/372a0f3b9dadd1e52234b498aa4c7040ef868c7d) ([#7101](https://github.com/yt-dlp/yt-dlp/issues/7101)) by [ivanskodje](https://github.com/ivanskodje), [pukkandan](https://github.com/pukkandan)
- [Deprecate internal `Youtubedl-no-compression` header](https://github.com/yt-dlp/yt-dlp/commit/955c89584b66fcd0fcfab3e611f1edeb1ca63886) ([#6876](https://github.com/yt-dlp/yt-dlp/issues/6876)) by [coletdjnz](https://github.com/coletdjnz)
- [Do not translate newlines in `--print-to-file`](https://github.com/yt-dlp/yt-dlp/commit/9874e82b5a61582169300bea561b3e8899ad1ef7) by [pukkandan](https://github.com/pukkandan)
- [Ensure pre-processor errors do not block `--print`](https://github.com/yt-dlp/yt-dlp/commit/f005a35aa7e4f67a0c603a946c0dd714c151b2d6) by [pukkandan](https://github.com/pukkandan) (With fixes in [17ba434](https://github.com/yt-dlp/yt-dlp/commit/17ba4343cf99701692a7f4798fd42b50f644faba))
- [Fix `filepath` being copied to underlying format dict](https://github.com/yt-dlp/yt-dlp/commit/84078a8b38f403495d00b46654c8750774d821de) by [pukkandan](https://github.com/pukkandan)
- [Improve HTTP redirect handling](https://github.com/yt-dlp/yt-dlp/commit/08916a49c777cb6e000eec092881eb93ec22076c) ([#7094](https://github.com/yt-dlp/yt-dlp/issues/7094)) by [coletdjnz](https://github.com/coletdjnz)
- [Populate `filename` and `urls` fields at all stages of `--print`](https://github.com/yt-dlp/yt-dlp/commit/170605840ea9d5ad75da6576485ea7d125b428ee) by [pukkandan](https://github.com/pukkandan) (With fixes in [b5f61b6](https://github.com/yt-dlp/yt-dlp/commit/b5f61b69d4561b81fc98c226b176f0c15493e688))
- [Relaxed validation for numeric format filters](https://github.com/yt-dlp/yt-dlp/commit/c3f624ef0a5d7a6ae1c5ffeb243087e9fc7d79dc) by [pukkandan](https://github.com/pukkandan)
- [Support decoding multiple content encodings](https://github.com/yt-dlp/yt-dlp/commit/daafbf49b3482edae4d70dd37070be99742a926e) ([#7142](https://github.com/yt-dlp/yt-dlp/issues/7142)) by [coletdjnz](https://github.com/coletdjnz)
- [Support loading info.json with a list at it's root](https://github.com/yt-dlp/yt-dlp/commit/ab1de9cb1e39cf421c2b7dc6756c6ff1955bb313) by [pukkandan](https://github.com/pukkandan)
- [Workaround erroneous urllib Windows proxy parsing](https://github.com/yt-dlp/yt-dlp/commit/3f66b6fe50f8d5b545712f8b19d5ae62f5373980) ([#7092](https://github.com/yt-dlp/yt-dlp/issues/7092)) by [coletdjnz](https://github.com/coletdjnz)
- **cookies**
- [Defer extraction of v11 key from keyring](https://github.com/yt-dlp/yt-dlp/commit/9b7a48abd1b187eae1e3f6c9839c47d43ccec00b) by [Grub4K](https://github.com/Grub4K)
- [Move `YoutubeDLCookieJar` to cookies module](https://github.com/yt-dlp/yt-dlp/commit/b87e01c123fd560b6a674ce00f45a9459d82d98a) ([#7091](https://github.com/yt-dlp/yt-dlp/issues/7091)) by [coletdjnz](https://github.com/coletdjnz)
- [Support custom Safari cookies path](https://github.com/yt-dlp/yt-dlp/commit/a58182b75a05fe0a10c5e94a536711d3ade19c20) ([#6783](https://github.com/yt-dlp/yt-dlp/issues/6783)) by [NextFire](https://github.com/NextFire)
- [Update for chromium changes](https://github.com/yt-dlp/yt-dlp/commit/b38d4c941d1993ab27e4c0f8e024e23c2ec0f8f8) ([#6897](https://github.com/yt-dlp/yt-dlp/issues/6897)) by [mbway](https://github.com/mbway)
- **Cryptodome**: [Fix `__bool__`](https://github.com/yt-dlp/yt-dlp/commit/98ac902c4979e4529b166e873473bef42baa2e3e) by [pukkandan](https://github.com/pukkandan)
- **jsinterp**
- [Do not compile regex](https://github.com/yt-dlp/yt-dlp/commit/7aeda6cc9e73ada0b0a0b6a6748c66bef63a20a8) by [pukkandan](https://github.com/pukkandan)
- [Fix division](https://github.com/yt-dlp/yt-dlp/commit/b4a252fba81f53631c07ca40ce7583f5d19a8a36) ([#7279](https://github.com/yt-dlp/yt-dlp/issues/7279)) by [bashonly](https://github.com/bashonly)
- [Fix global object extraction](https://github.com/yt-dlp/yt-dlp/commit/01aba2519a0884ef17d5f85608dbd2a455577147) by [pukkandan](https://github.com/pukkandan)
- [Handle `NaN` in bitwise operators](https://github.com/yt-dlp/yt-dlp/commit/1d7656184c6b8aa46b29149893894b3c24f1df00) by [pukkandan](https://github.com/pukkandan)
- [Handle negative numbers better](https://github.com/yt-dlp/yt-dlp/commit/7cf51f21916292cd80bdeceb37489f5322f166dd) by [pukkandan](https://github.com/pukkandan)
- **outtmpl**
- [Allow `\n` in replacements and default.](https://github.com/yt-dlp/yt-dlp/commit/78fde6e3398ff11e5d383a66b28664badeab5180) by [pukkandan](https://github.com/pukkandan)
- [Fix some minor bugs](https://github.com/yt-dlp/yt-dlp/commit/ebe1b4e34f43c3acad30e4bcb8484681a030c114) by [pukkandan](https://github.com/pukkandan) (With fixes in [1619ab3](https://github.com/yt-dlp/yt-dlp/commit/1619ab3e67d8dc4f86fc7ed292c79345bc0d91a0))
- [Support `str.format` syntax inside replacements](https://github.com/yt-dlp/yt-dlp/commit/ec9311c41b111110bc52cfbd6ea682c6fb23f77a) by [pukkandan](https://github.com/pukkandan)
- **update**
- [Better error handling](https://github.com/yt-dlp/yt-dlp/commit/d2e84d5eb01c66fc5304e8566348d65a7be24ed7) by [pukkandan](https://github.com/pukkandan)
- [Do not restart into versions without `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/02948a17d903f544363bb20b51a6d8baed7bba08) by [pukkandan](https://github.com/pukkandan)
- [Implement `--update-to` repo](https://github.com/yt-dlp/yt-dlp/commit/665472a7de3880578c0b7b3f95c71570c056368e) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
- **upstream**
- [Merged with youtube-dl 07af47](https://github.com/yt-dlp/yt-dlp/commit/42f2d40b475db66486a4b4fe5b56751a640db5db) by [pukkandan](https://github.com/pukkandan)
- [Merged with youtube-dl d1c6c5](https://github.com/yt-dlp/yt-dlp/commit/4823ec9f461512daa1b8ab362893bb86a6320b26) by [pukkandan](https://github.com/pukkandan) (With fixes in [edbe5b5](https://github.com/yt-dlp/yt-dlp/commit/edbe5b589dd0860a67b4e03f58db3cd2539d91c2) by [bashonly](https://github.com/bashonly))
- **utils**
- `FormatSorter`: [Improve `size` and `br`](https://github.com/yt-dlp/yt-dlp/commit/eedda5252c05327748dede204a8fccafa0288118) by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png)
- `js_to_json`: [Implement template strings](https://github.com/yt-dlp/yt-dlp/commit/0898c5c8ccadfc404472456a7a7751b72afebadd) ([#6623](https://github.com/yt-dlp/yt-dlp/issues/6623)) by [Grub4K](https://github.com/Grub4K)
- `locked_file`: [Fix for virtiofs](https://github.com/yt-dlp/yt-dlp/commit/45998b3e371b819ce0dbe50da703809a048cc2fe) ([#6840](https://github.com/yt-dlp/yt-dlp/issues/6840)) by [brandon-dacrib](https://github.com/brandon-dacrib)
- `strftime_or_none`: [Handle negative timestamps](https://github.com/yt-dlp/yt-dlp/commit/a35af4306d24c56c6358f89cdf204860d1cd62b4) by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
- `traverse_obj`
- [Allow iterables in traversal](https://github.com/yt-dlp/yt-dlp/commit/21b5ec86c2c37d10c5bb97edd7051d3aac16bb3e) ([#6902](https://github.com/yt-dlp/yt-dlp/issues/6902)) by [Grub4K](https://github.com/Grub4K)
- [More fixes](https://github.com/yt-dlp/yt-dlp/commit/b079c26f0af8085bccdadc72c61c8164ca5ab0f8) ([#6959](https://github.com/yt-dlp/yt-dlp/issues/6959)) by [Grub4K](https://github.com/Grub4K)
- `write_string`: [Fix noconsole behavior](https://github.com/yt-dlp/yt-dlp/commit/3b479100df02e20dd949e046003ae96ddbfced57) by [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- [Do not exit early for unsuitable `url_result`](https://github.com/yt-dlp/yt-dlp/commit/baa922b5c74b10e3b86ff5e6cf6529b3aae8efab) by [pukkandan](https://github.com/pukkandan)
- [Do not warn for invalid chapter data in description](https://github.com/yt-dlp/yt-dlp/commit/84ffeb7d5e72e3829319ba7720a8480fc4c7503b) by [pukkandan](https://github.com/pukkandan)
- [Extract more metadata from ISM](https://github.com/yt-dlp/yt-dlp/commit/f68434cc74cfd3db01b266476a2eac8329fbb267) by [pukkandan](https://github.com/pukkandan)
- **abematv**: [Add fallback for title and description extraction and extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/c449c0655d7c8549e6e1389c26b628053b253d39) ([#6994](https://github.com/yt-dlp/yt-dlp/issues/6994)) by [Lesmiscore](https://github.com/Lesmiscore)
- **acast**: [Support embeds](https://github.com/yt-dlp/yt-dlp/commit/c91ac833ea99b00506e470a44cf930e4e23378c9) ([#7212](https://github.com/yt-dlp/yt-dlp/issues/7212)) by [pabs3](https://github.com/pabs3)
- **adobepass**: [Handle `Charter_Direct` MSO as `Spectrum`](https://github.com/yt-dlp/yt-dlp/commit/ea0570820336a0fe9c3b530d1b0d1e59313274f4) ([#6824](https://github.com/yt-dlp/yt-dlp/issues/6824)) by [bashonly](https://github.com/bashonly)
- **aeonco**: [Support Youtube embeds](https://github.com/yt-dlp/yt-dlp/commit/ed81b74802b4247ee8d9dc0ef87eb52baefede1c) ([#6591](https://github.com/yt-dlp/yt-dlp/issues/6591)) by [alexklapheke](https://github.com/alexklapheke)
- **afreecatv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fdd69db38924c38194ef236b26325d66ac815c88) ([#6283](https://github.com/yt-dlp/yt-dlp/issues/6283)) by [blmarket](https://github.com/blmarket)
- **ARDBetaMediathek**: [Add thumbnail](https://github.com/yt-dlp/yt-dlp/commit/f78eb41e1c0f1dcdb10317358a26bf541dc7ee15) ([#6890](https://github.com/yt-dlp/yt-dlp/issues/6890)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
- **bibeltv**: [Fix extraction, support live streams and series](https://github.com/yt-dlp/yt-dlp/commit/4ad58667c102bd82a7c4cca8aa395ec1682e3b4c) ([#6505](https://github.com/yt-dlp/yt-dlp/issues/6505)) by [flashdagger](https://github.com/flashdagger)
- **bilibili**
- [Support festival videos](https://github.com/yt-dlp/yt-dlp/commit/ab29e47029e2f5b48abbbab78e82faf7cf6e9506) ([#6547](https://github.com/yt-dlp/yt-dlp/issues/6547)) by [qbnu](https://github.com/qbnu)
- SpaceVideo: [Extract signature](https://github.com/yt-dlp/yt-dlp/commit/6f10cdcf7eeaeae5b75e0a4428cd649c156a2d83) ([#7149](https://github.com/yt-dlp/yt-dlp/issues/7149)) by [elyse0](https://github.com/elyse0)
- **biliIntl**: [Add comment extraction](https://github.com/yt-dlp/yt-dlp/commit/b093c38cc9f26b59a8504211d792f053142c847d) ([#6079](https://github.com/yt-dlp/yt-dlp/issues/6079)) by [HobbyistDev](https://github.com/HobbyistDev)
- **bitchute**: [Add more fallback subdomains](https://github.com/yt-dlp/yt-dlp/commit/0c4e0fbcade0fc92d14c2a6d63e360fe067f6192) ([#6907](https://github.com/yt-dlp/yt-dlp/issues/6907)) by [Neurognostic](https://github.com/Neurognostic)
- **booyah**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f7f7a877bf8e87fd4eb0ad2494ad948ca7691114) by [pukkandan](https://github.com/pukkandan)
- **BrainPOP**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/979568f26ece80bca72b48f0dd57d676e431059a) ([#6106](https://github.com/yt-dlp/yt-dlp/issues/6106)) by [MinePlayersPE](https://github.com/MinePlayersPE)
- **bravotv**
- [Detect DRM](https://github.com/yt-dlp/yt-dlp/commit/1fe5bf240e6ade487d18079a62aa36bcc440a27a) ([#7171](https://github.com/yt-dlp/yt-dlp/issues/7171)) by [bashonly](https://github.com/bashonly)
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/06966cb8966b9aa4f60ab9c44c182a057d4ca3a3) ([#6568](https://github.com/yt-dlp/yt-dlp/issues/6568)) by [bashonly](https://github.com/bashonly)
- **camfm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/4cbfa570a1b9bd65b0f48770693377e8d842dcb0) ([#7083](https://github.com/yt-dlp/yt-dlp/issues/7083)) by [garret1317](https://github.com/garret1317)
- **cbc**
- [Fix live extractor, playlist `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/7a7b1376fbce0067cf37566bb47131bc0022638d) ([#6625](https://github.com/yt-dlp/yt-dlp/issues/6625)) by [makew0rld](https://github.com/makew0rld)
- [Ignore 426 from API](https://github.com/yt-dlp/yt-dlp/commit/4afb208cf07b59291ae3b0c4efc83945ee5b8812) ([#6781](https://github.com/yt-dlp/yt-dlp/issues/6781)) by [jo-nike](https://github.com/jo-nike)
- gem: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/871c907454693940cb56906ed9ea49fcb7154829) ([#6499](https://github.com/yt-dlp/yt-dlp/issues/6499)) by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
- **cbs**: [Add `ParamountPressExpress` extractor](https://github.com/yt-dlp/yt-dlp/commit/44369c9afa996e14e9f466754481d878811b5b4a) ([#6604](https://github.com/yt-dlp/yt-dlp/issues/6604)) by [bashonly](https://github.com/bashonly)
- **cbsnews**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/f6e43d6fa9804c24525e1fed0a87782754dab7ed) ([#6681](https://github.com/yt-dlp/yt-dlp/issues/6681)) by [bashonly](https://github.com/bashonly)
- **chilloutzone**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6f4fc5660f40f3458882a8f51601eae4af7be609) ([#6445](https://github.com/yt-dlp/yt-dlp/issues/6445)) by [bashonly](https://github.com/bashonly)
- **clipchamp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2f07c4c1da4361af213e5791279b9d152d2e4ce3) ([#6978](https://github.com/yt-dlp/yt-dlp/issues/6978)) by [bashonly](https://github.com/bashonly)
- **comedycentral**: [Add support for movies](https://github.com/yt-dlp/yt-dlp/commit/66468bbf49562ff82670cbbd456c5e8448a6df34) ([#7108](https://github.com/yt-dlp/yt-dlp/issues/7108)) by [sqrtNOT](https://github.com/sqrtNOT)
- **crtvg**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/26c517b29c8727e47948d6fff749d5297f0efb60) ([#7168](https://github.com/yt-dlp/yt-dlp/issues/7168)) by [ItzMaxTV](https://github.com/ItzMaxTV)
- **crunchyroll**: [Rework with support for movies, music and artists](https://github.com/yt-dlp/yt-dlp/commit/032de83ea9ff2f4977d9c71a93bbc1775597b762) ([#6237](https://github.com/yt-dlp/yt-dlp/issues/6237)) by [Grub4K](https://github.com/Grub4K)
- **dacast**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/c25cac2f8e5fbac2737a426d7778fd2f0efc5381) ([#6896](https://github.com/yt-dlp/yt-dlp/issues/6896)) by [bashonly](https://github.com/bashonly)
- **daftsex**: [Update domain and embed player url](https://github.com/yt-dlp/yt-dlp/commit/fc5a7f9b27d2a89b1f3ca7d33a95301c21d832cd) ([#5966](https://github.com/yt-dlp/yt-dlp/issues/5966)) by [JChris246](https://github.com/JChris246)
- **DigitalConcertHall**: [Support films](https://github.com/yt-dlp/yt-dlp/commit/55ed4ff73487feb3177b037dfc2ea527e777da3e) ([#7202](https://github.com/yt-dlp/yt-dlp/issues/7202)) by [ItzMaxTV](https://github.com/ItzMaxTV)
- **discogs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6daaf21092888beff11b807cd46f832f1f9c46a0) ([#6624](https://github.com/yt-dlp/yt-dlp/issues/6624)) by [rjy](https://github.com/rjy)
- **dlf**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b423b6a48e0b19260bc95ab7d72d2138d7f124dc) ([#6697](https://github.com/yt-dlp/yt-dlp/issues/6697)) by [nick-cd](https://github.com/nick-cd)
- **drtv**: [Fix radio page extraction](https://github.com/yt-dlp/yt-dlp/commit/9a06b7b1891b48cebbe275652ae8025a36d97d97) ([#6552](https://github.com/yt-dlp/yt-dlp/issues/6552)) by [viktor-enzell](https://github.com/viktor-enzell)
- **Dumpert**: [Fix m3u8 and support new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/f8ae441501596733e2b967430471643a1d7cacb8) ([#6091](https://github.com/yt-dlp/yt-dlp/issues/6091)) by [DataGhost](https://github.com/DataGhost), [pukkandan](https://github.com/pukkandan)
- **elevensports**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ecfe47973f6603b5367fe2cc3c65274627d94516) ([#7172](https://github.com/yt-dlp/yt-dlp/issues/7172)) by [ItzMaxTV](https://github.com/ItzMaxTV)
- **ettutv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/83465fc4100a2fb2c188898fbc2f3021f6a9b4dd) ([#6579](https://github.com/yt-dlp/yt-dlp/issues/6579)) by [elyse0](https://github.com/elyse0)
- **europarl**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/03789976d301eaed3e957dbc041573098f6af059) ([#7114](https://github.com/yt-dlp/yt-dlp/issues/7114)) by [HobbyistDev](https://github.com/HobbyistDev)
- **eurosport**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/45e87ea106ad37b2a002663fa30ee41ce97b16cd) ([#7076](https://github.com/yt-dlp/yt-dlp/issues/7076)) by [HobbyistDev](https://github.com/HobbyistDev)
- **facebook**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/3b52a606881e6adadc33444abdeacce562b79330) ([#6856](https://github.com/yt-dlp/yt-dlp/issues/6856)) by [ringus1](https://github.com/ringus1)
- **foxnews**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/97d60ad8cd6c99f01e463a9acfce8693aff2a609) ([#7222](https://github.com/yt-dlp/yt-dlp/issues/7222)) by [bashonly](https://github.com/bashonly)
- **funker530**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/cab94a0cd8b6d3fffed5a6faff030274adbed182) ([#7291](https://github.com/yt-dlp/yt-dlp/issues/7291)) by [Cyberes](https://github.com/Cyberes)
- **generic**
- [Accept values for `fragment_query`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/5cc0a8fd2e9fec50026fb92170b57993af939e4a) ([#6600](https://github.com/yt-dlp/yt-dlp/issues/6600)) by [bashonly](https://github.com/bashonly) (With fixes in [9bfe0d1](https://github.com/yt-dlp/yt-dlp/commit/9bfe0d15bd7dbdc6b0e6378fa9f5e2e289b2373b))
- [Add extractor-args `hls_key`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/c2e0fc40a73dd85ab3920f977f579d475e66ef59) ([#6567](https://github.com/yt-dlp/yt-dlp/issues/6567)) by [bashonly](https://github.com/bashonly)
- [Attempt to detect live HLS](https://github.com/yt-dlp/yt-dlp/commit/93e7c6995e07dafb9dcc06c0d06acf6c5bdfecc5) ([#6775](https://github.com/yt-dlp/yt-dlp/issues/6775)) by [bashonly](https://github.com/bashonly)
- **genius**: [Add support for articles](https://github.com/yt-dlp/yt-dlp/commit/460da07439718d9af1e3661da2a23e05a913a2e6) ([#6474](https://github.com/yt-dlp/yt-dlp/issues/6474)) by [bashonly](https://github.com/bashonly)
- **globalplayer**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/30647668a92a0ca5cd108776804baac0996bd9f7) ([#6903](https://github.com/yt-dlp/yt-dlp/issues/6903)) by [garret1317](https://github.com/garret1317)
- **gmanetwork**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2d97d154fe4fb84fe2ed3a4e1ed5819e89b71e88) ([#5945](https://github.com/yt-dlp/yt-dlp/issues/5945)) by [HobbyistDev](https://github.com/HobbyistDev)
- **gronkh**: [Extract duration and chapters](https://github.com/yt-dlp/yt-dlp/commit/9c92b803fa24e48543ce969468d5404376e315b7) ([#6817](https://github.com/yt-dlp/yt-dlp/issues/6817)) by [satan1st](https://github.com/satan1st)
- **hentaistigma**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/04f8018a0544736a18494bc3899d06b05b78fae6) by [pukkandan](https://github.com/pukkandan)
- **hidive**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/e6ab678e36c40ded0aae305bbb866cdab554d417) by [pukkandan](https://github.com/pukkandan)
- **hollywoodreporter**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6bdb64e2a2a6d504d8ce1dc830fbfb8a7f199c63) ([#6614](https://github.com/yt-dlp/yt-dlp/issues/6614)) by [bashonly](https://github.com/bashonly)
- **hotstar**: [Support `/shows/` URLs](https://github.com/yt-dlp/yt-dlp/commit/7f8ddebbb51c9fd4a347306332a718ba41b371b8) ([#7225](https://github.com/yt-dlp/yt-dlp/issues/7225)) by [bashonly](https://github.com/bashonly)
- **hrefli**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7e35526d5b970a034b9d76215ee3e4bd7631edcd) ([#6762](https://github.com/yt-dlp/yt-dlp/issues/6762)) by [selfisekai](https://github.com/selfisekai)
- **idolplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c14b213679ed4401288bdc86ae696932e219222) ([#6732](https://github.com/yt-dlp/yt-dlp/issues/6732)) by [ping](https://github.com/ping)
- **iq**: [Set more language codes](https://github.com/yt-dlp/yt-dlp/commit/2d5cae9636714ff922d28c548c349d5f2b48f317) ([#6476](https://github.com/yt-dlp/yt-dlp/issues/6476)) by [D0LLYNH0](https://github.com/D0LLYNH0)
- **iwara**
- [Accept old URLs](https://github.com/yt-dlp/yt-dlp/commit/ab92d8651c48d247dfb7d3f0a824cc986e47c7ed) by [Lesmiscore](https://github.com/Lesmiscore)
- [Fix authentication](https://github.com/yt-dlp/yt-dlp/commit/0a5d7c39e17bb9bd50c9db42bcad40eb82d7f784) ([#7137](https://github.com/yt-dlp/yt-dlp/issues/7137)) by [toomyzoom](https://github.com/toomyzoom)
- [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/56793f74c36899742d7abd52afb0deca97d469e1) ([#6651](https://github.com/yt-dlp/yt-dlp/issues/6651)) by [hasezoey](https://github.com/hasezoey)
- [Fix typo](https://github.com/yt-dlp/yt-dlp/commit/d1483ec693c79f0b4ddf493870bcb840aca4da08) by [Lesmiscore](https://github.com/Lesmiscore)
- [Implement login](https://github.com/yt-dlp/yt-dlp/commit/21b9413cf7dd4830b2ece57af21589dd4538fc52) ([#6721](https://github.com/yt-dlp/yt-dlp/issues/6721)) by [toomyzoom](https://github.com/toomyzoom)
- [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/c14af7a741931b364bab3d9546c0f4359f318f8c) ([#6557](https://github.com/yt-dlp/yt-dlp/issues/6557)) by [Lesmiscore](https://github.com/Lesmiscore)
- [Report private videos](https://github.com/yt-dlp/yt-dlp/commit/95a383be1b6fb00c92ee3fb091732c4f6009acb6) ([#6641](https://github.com/yt-dlp/yt-dlp/issues/6641)) by [Lesmiscore](https://github.com/Lesmiscore)
- **JStream**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3459d3c5af3b2572ed51e8ecfda6c11022a838c6) ([#6252](https://github.com/yt-dlp/yt-dlp/issues/6252)) by [Lesmiscore](https://github.com/Lesmiscore)
- **jwplatform**: [Update `_extract_embed_urls`](https://github.com/yt-dlp/yt-dlp/commit/cf9fd52fabe71d6e7c30d3ea525029ffa561fc9c) ([#6383](https://github.com/yt-dlp/yt-dlp/issues/6383)) by [carusocr](https://github.com/carusocr)
- **kick**: [Make initial request non-fatal](https://github.com/yt-dlp/yt-dlp/commit/0a6918a4a1431960181d8c50e0bbbcb0afbaff9a) by [bashonly](https://github.com/bashonly)
- **LastFM**: [Rewrite playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/026435714cb7c39613a0d7d2acd15d3823b78d94) ([#6379](https://github.com/yt-dlp/yt-dlp/issues/6379)) by [hatienl0i261299](https://github.com/hatienl0i261299), [pukkandan](https://github.com/pukkandan)
- **lbry**: [Extract original quality formats](https://github.com/yt-dlp/yt-dlp/commit/44c0d66442b568d9e1359e669d8b029b08a77fa7) ([#7257](https://github.com/yt-dlp/yt-dlp/issues/7257)) by [bashonly](https://github.com/bashonly)
- **line**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/faa0332ed69e070cf3bd31390589a596e962f392) ([#6734](https://github.com/yt-dlp/yt-dlp/issues/6734)) by [sian1468](https://github.com/sian1468)
- **livestream**: [Support videos with account id](https://github.com/yt-dlp/yt-dlp/commit/bfdf144c7e5d7a93fbfa9d8e65598c72bf2b542a) ([#6324](https://github.com/yt-dlp/yt-dlp/issues/6324)) by [theperfectpunk](https://github.com/theperfectpunk)
- **medaltv**: [Fix clips](https://github.com/yt-dlp/yt-dlp/commit/1e3c2b6ec28d7ab5e31341fa93c47b65be4fbff4) ([#6502](https://github.com/yt-dlp/yt-dlp/issues/6502)) by [xenova](https://github.com/xenova)
- **mediastream**: [Improve `WinSports` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/03025b6e105139d01cd415ddc51fd692957fd2ba) ([#6426](https://github.com/yt-dlp/yt-dlp/issues/6426)) by [bashonly](https://github.com/bashonly)
- **mgtv**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/59d9fe08312bbb76ee26238d207a8ca35410a48d) ([#7234](https://github.com/yt-dlp/yt-dlp/issues/7234)) by [bashonly](https://github.com/bashonly)
- **Mzaalo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dc3c44f349ba85af320e706e2a27ad81a78b1c6e) ([#7163](https://github.com/yt-dlp/yt-dlp/issues/7163)) by [ItzMaxTV](https://github.com/ItzMaxTV)
- **nbc**: [Fix `NBCStations` direct mp4 formats](https://github.com/yt-dlp/yt-dlp/commit/9be0fe1fd967f62cbf3c60bd14e1021a70abc147) ([#6637](https://github.com/yt-dlp/yt-dlp/issues/6637)) by [bashonly](https://github.com/bashonly)
- **nebula**: [Add `beta.nebula.tv`](https://github.com/yt-dlp/yt-dlp/commit/cbfe2e5cbe0f4649a91e323a82b8f5f774f36662) ([#6516](https://github.com/yt-dlp/yt-dlp/issues/6516)) by [unbeatable-101](https://github.com/unbeatable-101)
- **nekohacker**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/489f51279d00318018478fd7461eddbe3b45297e) ([#7003](https://github.com/yt-dlp/yt-dlp/issues/7003)) by [hasezoey](https://github.com/hasezoey)
- **nhk**
- [Add `NhkRadiru` extractor](https://github.com/yt-dlp/yt-dlp/commit/8f0be90ecb3b8d862397177bb226f17b245ef933) ([#6819](https://github.com/yt-dlp/yt-dlp/issues/6819)) by [garret1317](https://github.com/garret1317)
- [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/f41b949a2ef646fbc36375febbe3f0c19d742c0f) ([#7180](https://github.com/yt-dlp/yt-dlp/issues/7180)) by [menschel](https://github.com/menschel), [sjthespian](https://github.com/sjthespian)
- `NhkRadiruLive`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/81c8b9bdd9841b72cbfc1bbff9dab5fb4aa038b0) ([#7332](https://github.com/yt-dlp/yt-dlp/issues/7332)) by [garret1317](https://github.com/garret1317)
- **niconico**
- [Download comments from the new endpoint](https://github.com/yt-dlp/yt-dlp/commit/52ecc33e221f7de7eb6fed6c22489f0c5fdd2c6d) ([#6773](https://github.com/yt-dlp/yt-dlp/issues/6773)) by [Lesmiscore](https://github.com/Lesmiscore)
- live: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8f9250fe280d37f0988646cd5cc0072f4d33a6d) ([#5764](https://github.com/yt-dlp/yt-dlp/issues/5764)) by [Lesmiscore](https://github.com/Lesmiscore)
- series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/c86e433c35fe5da6cb29f3539eef97497f84ed38) ([#6898](https://github.com/yt-dlp/yt-dlp/issues/6898)) by [sqrtNOT](https://github.com/sqrtNOT)
- **nubilesporn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d4e6ef40772e0560a8ed33b844ef7549e86837be) ([#6231](https://github.com/yt-dlp/yt-dlp/issues/6231)) by [permunkle](https://github.com/permunkle)
- **odnoklassniki**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/1a2eb5bda51d8b7a78a65acebf72a0dcf9da196b) ([#7217](https://github.com/yt-dlp/yt-dlp/issues/7217)) by [bashonly](https://github.com/bashonly)
- **opencast**
- [Add ltitools to `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3588be59cee429a0ab5c4ceb2f162298bb44147d) ([#6371](https://github.com/yt-dlp/yt-dlp/issues/6371)) by [C0D3D3V](https://github.com/C0D3D3V)
- [Fix format bug](https://github.com/yt-dlp/yt-dlp/commit/89dbf0848370deaa55af88c3593a2a264124caf5) ([#6512](https://github.com/yt-dlp/yt-dlp/issues/6512)) by [C0D3D3V](https://github.com/C0D3D3V)
- **owncloud**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c6d4b82a8b8bce59b1c9ce5e6d349ea428dac0a7) ([#6533](https://github.com/yt-dlp/yt-dlp/issues/6533)) by [C0D3D3V](https://github.com/C0D3D3V)
- **Parler**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/80ea6d3dea8483cddd39fc89b5ee1fc06670c33c) ([#6446](https://github.com/yt-dlp/yt-dlp/issues/6446)) by [JChris246](https://github.com/JChris246)
- **pgatour**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae182ad89e1427ff7b1684d6a44ff93fa857a0c) ([#6613](https://github.com/yt-dlp/yt-dlp/issues/6613)) by [bashonly](https://github.com/bashonly)
- **playsuisse**: [Support new url format](https://github.com/yt-dlp/yt-dlp/commit/94627c5dde12a72766bdba36e056916c29c40ed1) ([#6528](https://github.com/yt-dlp/yt-dlp/issues/6528)) by [sbor23](https://github.com/sbor23)
- **polskieradio**: [Improve extractors](https://github.com/yt-dlp/yt-dlp/commit/738c90a463257634455ada3e5c18b714c531dede) ([#5948](https://github.com/yt-dlp/yt-dlp/issues/5948)) by [selfisekai](https://github.com/selfisekai)
- **pornez**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/cbdf9408e6f1e35e98fd6477b3d6902df5b8a47f) ([#6792](https://github.com/yt-dlp/yt-dlp/issues/6792)) by [zhgwn](https://github.com/zhgwn)
- **pornhub**: [Set access cookies to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/62beefa818c75c20b6941389bb197051554a5d41) ([#6685](https://github.com/yt-dlp/yt-dlp/issues/6685)) by [arobase-che](https://github.com/arobase-che), [Schmoaaaaah](https://github.com/Schmoaaaaah)
- **rai**: [Rewrite extractors](https://github.com/yt-dlp/yt-dlp/commit/c6d3f81a4077aaf9cffc6aa2d0dec92f38e74bb0) ([#5940](https://github.com/yt-dlp/yt-dlp/issues/5940)) by [danog](https://github.com/danog), [nixxo](https://github.com/nixxo)
- **recurbate**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2502cfed91415c7ccfff925fd3404d230046484) ([#6297](https://github.com/yt-dlp/yt-dlp/issues/6297)) by [mrscrapy](https://github.com/mrscrapy)
- **reddit**
- [Add login support](https://github.com/yt-dlp/yt-dlp/commit/4d9280c9c853733534dda60486fa949bcca36c9e) ([#6950](https://github.com/yt-dlp/yt-dlp/issues/6950)) by [bashonly](https://github.com/bashonly)
- [Support cookies and short URLs](https://github.com/yt-dlp/yt-dlp/commit/7a6f6f24592a8065376f11a58e44878807732cf6) ([#6825](https://github.com/yt-dlp/yt-dlp/issues/6825)) by [bashonly](https://github.com/bashonly)
- **rokfin**: [Re-construct manifest url](https://github.com/yt-dlp/yt-dlp/commit/7a6c8a0807941dd24fbf0d6172e811884f98e027) ([#6507](https://github.com/yt-dlp/yt-dlp/issues/6507)) by [vampirefrog](https://github.com/vampirefrog)
- **rottentomatoes**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2d306c03d6f2697fcbabb7da35aa62cc078359d3) ([#6844](https://github.com/yt-dlp/yt-dlp/issues/6844)) by [JChris246](https://github.com/JChris246)
- **rozhlas**
- [Extract manifest formats](https://github.com/yt-dlp/yt-dlp/commit/e4cf7741f9302b3faa092962f2895b55cb3d89bb) ([#6590](https://github.com/yt-dlp/yt-dlp/issues/6590)) by [bashonly](https://github.com/bashonly)
- `MujRozhlas`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2b801fea59628d5c873e06a0727fbf2051bbd1f) ([#7129](https://github.com/yt-dlp/yt-dlp/issues/7129)) by [stanoarn](https://github.com/stanoarn)
- **rtvc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/9b30cd3dfce83c2f0201b28a7a3ef44ab9722664) ([#6578](https://github.com/yt-dlp/yt-dlp/issues/6578)) by [elyse0](https://github.com/elyse0)
- **rumble**
- [Detect timeline format](https://github.com/yt-dlp/yt-dlp/commit/78bc1868ff3352108ab2911033d1ac67a55f151e) by [pukkandan](https://github.com/pukkandan)
- [Fix videos without quality selection](https://github.com/yt-dlp/yt-dlp/commit/6994afc030d2a786d8032075ed71a14d7eac5a4f) by [pukkandan](https://github.com/pukkandan)
- **sbs**: [Overhaul extractor for new API](https://github.com/yt-dlp/yt-dlp/commit/6a765f135ccb654861336ea27a2c1c24ea8e286f) ([#6839](https://github.com/yt-dlp/yt-dlp/issues/6839)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [vidiot720](https://github.com/vidiot720)
- **shemaroome**: [Pass `stream_key` header to downloader](https://github.com/yt-dlp/yt-dlp/commit/7bc92517463f5766e9d9b92c3823b5cf403c0e3d) ([#7224](https://github.com/yt-dlp/yt-dlp/issues/7224)) by [bashonly](https://github.com/bashonly)
- **sonyliv**: [Fix login with token](https://github.com/yt-dlp/yt-dlp/commit/4815d35c191e7d375b94492a6486dd2ba43a8954) ([#7223](https://github.com/yt-dlp/yt-dlp/issues/7223)) by [bashonly](https://github.com/bashonly)
- **stageplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e5265dc6517478e589ee3c1ff0cb19bdf4e35ce1) ([#6838](https://github.com/yt-dlp/yt-dlp/issues/6838)) by [bashonly](https://github.com/bashonly)
- **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f9213f8a2d7ba46b912afe1dd3ce6bb700a33d72) ([#7306](https://github.com/yt-dlp/yt-dlp/issues/7306)) by [foreignBlade](https://github.com/foreignBlade)
- **substack**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/12037d8b0a578fcc78a5c8f98964e48ee6060e25) ([#7218](https://github.com/yt-dlp/yt-dlp/issues/7218)) by [bashonly](https://github.com/bashonly)
- **sverigesradio**: [Support slug URLs](https://github.com/yt-dlp/yt-dlp/commit/5ee9a7d6e18ceea956e831994cf11c423979354f) ([#7220](https://github.com/yt-dlp/yt-dlp/issues/7220)) by [bashonly](https://github.com/bashonly)
- **tagesschau**: [Fix single audio urls](https://github.com/yt-dlp/yt-dlp/commit/af7585c824a1e405bd8afa46d87b4be322edc93c) ([#6626](https://github.com/yt-dlp/yt-dlp/issues/6626)) by [flashdagger](https://github.com/flashdagger)
- **teamcoco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c459d45dd4d417fb80a52e1a04e607776a44baa4) ([#6437](https://github.com/yt-dlp/yt-dlp/issues/6437)) by [bashonly](https://github.com/bashonly)
- **telecaribe**: [Expand livestream support](https://github.com/yt-dlp/yt-dlp/commit/69b2f838d3d3e37dc17367ef64d978db1bea45cf) ([#6601](https://github.com/yt-dlp/yt-dlp/issues/6601)) by [bashonly](https://github.com/bashonly)
- **tencent**: [Fix fatal metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/971d901d129403e875a04dd92109507a03fbc070) ([#7219](https://github.com/yt-dlp/yt-dlp/issues/7219)) by [bashonly](https://github.com/bashonly)
- **thesun**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/0181b9a1b31db3fde943f7cd3fe9662f23bff292) ([#6522](https://github.com/yt-dlp/yt-dlp/issues/6522)) by [hatienl0i261299](https://github.com/hatienl0i261299)
- **tiktok**
- [Extract 1080p adaptive formats](https://github.com/yt-dlp/yt-dlp/commit/c2a1bdb00931969193f2a31ea27b9c66a07aaec2) ([#7228](https://github.com/yt-dlp/yt-dlp/issues/7228)) by [bashonly](https://github.com/bashonly)
- [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/925936908a3c3ee0e508621db14696b9f6a8b563) ([#6777](https://github.com/yt-dlp/yt-dlp/issues/6777)) by [bashonly](https://github.com/bashonly)
- [Fix mp3 formats](https://github.com/yt-dlp/yt-dlp/commit/8ceb07e870424c219dced8f4348729553f05c5cc) ([#6615](https://github.com/yt-dlp/yt-dlp/issues/6615)) by [bashonly](https://github.com/bashonly)
- [Fix resolution extraction](https://github.com/yt-dlp/yt-dlp/commit/ab6057ec80aa75db6303b8206916d00c376c622c) ([#7237](https://github.com/yt-dlp/yt-dlp/issues/7237)) by [puc9](https://github.com/puc9)
- [Improve `TikTokLive` extractor](https://github.com/yt-dlp/yt-dlp/commit/216bcb66d7dce0762767d751dad10650cb57da9d) ([#6520](https://github.com/yt-dlp/yt-dlp/issues/6520)) by [bashonly](https://github.com/bashonly)
- **triller**: [Support short URLs, detect removed videos](https://github.com/yt-dlp/yt-dlp/commit/33b737bedf8383c0d00d4e1d06a5273dcdfdb756) ([#6636](https://github.com/yt-dlp/yt-dlp/issues/6636)) by [bashonly](https://github.com/bashonly)
- **tv4**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/125ffaa1737dd04716f2f6fbb0595ad3eb7a4b1c) ([#5649](https://github.com/yt-dlp/yt-dlp/issues/5649)) by [dirkf](https://github.com/dirkf), [TxI5](https://github.com/TxI5)
- **tvp**: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/0c7ce146e4d2a84e656d78f6857952bfd25ab389) ([#6989](https://github.com/yt-dlp/yt-dlp/issues/6989)) by [selfisekai](https://github.com/selfisekai)
- **tvplay**: [Remove outdated domains](https://github.com/yt-dlp/yt-dlp/commit/937264419f9bf375d5656785ae6e53282587c15d) ([#7106](https://github.com/yt-dlp/yt-dlp/issues/7106)) by [ivanskodje](https://github.com/ivanskodje)
- **twitch**
- [Extract original size thumbnail](https://github.com/yt-dlp/yt-dlp/commit/80b732b7a9585b2a61e456dc0d2d014a439cbaee) ([#6629](https://github.com/yt-dlp/yt-dlp/issues/6629)) by [JC-Chung](https://github.com/JC-Chung)
- [Fix `is_live`](https://github.com/yt-dlp/yt-dlp/commit/0551511b45f7847f40e4314aa9e624e80d086539) ([#6500](https://github.com/yt-dlp/yt-dlp/issues/6500)) by [elyse0](https://github.com/elyse0)
- [Support mobile clips](https://github.com/yt-dlp/yt-dlp/commit/02312c03cf53eb1da24c9ad022ee79af26060733) ([#6699](https://github.com/yt-dlp/yt-dlp/issues/6699)) by [bepvte](https://github.com/bepvte)
- [Update `_CLIENT_ID` and add extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/01231feb142e80828985aabdec04ac608e3d43e2) ([#7200](https://github.com/yt-dlp/yt-dlp/issues/7200)) by [bashonly](https://github.com/bashonly)
- vod: [Support links from schedule tab](https://github.com/yt-dlp/yt-dlp/commit/dbce5afa6bb61f6272ade613f2e9a3d66b88c7ea) ([#7071](https://github.com/yt-dlp/yt-dlp/issues/7071)) by [falbrechtskirchinger](https://github.com/falbrechtskirchinger)
- **twitter**
- [Add login support](https://github.com/yt-dlp/yt-dlp/commit/d1795f4a6af99c976c9d3ea2dabe5cf4f8965d3c) ([#7258](https://github.com/yt-dlp/yt-dlp/issues/7258)) by [bashonly](https://github.com/bashonly)
- [Default to GraphQL, handle auth errors](https://github.com/yt-dlp/yt-dlp/commit/147e62fc584c3ea6fdb09bb7a47905df68553a22) ([#6957](https://github.com/yt-dlp/yt-dlp/issues/6957)) by [bashonly](https://github.com/bashonly)
- spaces: [Add `release_timestamp`](https://github.com/yt-dlp/yt-dlp/commit/1c16d9df5330819cc79ad588b24aa5b72765c168) ([#7186](https://github.com/yt-dlp/yt-dlp/issues/7186)) by [CeruleanSky](https://github.com/CeruleanSky)
- **urplay**: [Extract all subtitles](https://github.com/yt-dlp/yt-dlp/commit/7bcd4813215ac98daa4949af2ffc677c78307a38) ([#7309](https://github.com/yt-dlp/yt-dlp/issues/7309)) by [hoaluvn](https://github.com/hoaluvn)
- **voot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4f7b11cc1c1cebf598107e00cd7295588ed484da) ([#7227](https://github.com/yt-dlp/yt-dlp/issues/7227)) by [bashonly](https://github.com/bashonly)
- **vrt**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/1a7dcca378e80a387923ee05c250d8ba122441c6) ([#6244](https://github.com/yt-dlp/yt-dlp/issues/6244)) by [bashonly](https://github.com/bashonly), [bergoid](https://github.com/bergoid), [jeroenj](https://github.com/jeroenj)
- **weverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b844a3f8b16500663e7ab6c6ec061cc9b30f71ac) ([#6711](https://github.com/yt-dlp/yt-dlp/issues/6711)) by [bashonly](https://github.com/bashonly) (With fixes in [fd5d93f](https://github.com/yt-dlp/yt-dlp/commit/fd5d93f7040f9776fd541f4e4079dad7d3b3fb4f))
- **wevidi**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1ea15603d852971ed7d92f4de12808b27b3d9370) ([#6868](https://github.com/yt-dlp/yt-dlp/issues/6868)) by [truedread](https://github.com/truedread)
- **weyyak**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6dc00acf0f1f1107a626c21befd1691403e6aeeb) ([#7124](https://github.com/yt-dlp/yt-dlp/issues/7124)) by [ItzMaxTV](https://github.com/ItzMaxTV)
- **whyp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2c566ed14101673c651c08c306c30fa5b4010b85) ([#6803](https://github.com/yt-dlp/yt-dlp/issues/6803)) by [CoryTibbettsDev](https://github.com/CoryTibbettsDev)
- **wrestleuniverse**
- [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/c8561c6d03f025268d6d3972abeb47987c8d7cbb) by [bashonly](https://github.com/bashonly)
- [Fix extraction, add login](https://github.com/yt-dlp/yt-dlp/commit/ef8fb7f029b816dfc95600727d84400591a3b5c5) ([#6982](https://github.com/yt-dlp/yt-dlp/issues/6982)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- **wykop**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/aed945e1b9b7d3af2a907e1a12e6508cc81d6a20) ([#6140](https://github.com/yt-dlp/yt-dlp/issues/6140)) by [selfisekai](https://github.com/selfisekai)
- **ximalaya**: [Sort playlist entries](https://github.com/yt-dlp/yt-dlp/commit/8790ea7b2536332777bce68590386b1aa935fac7) ([#7292](https://github.com/yt-dlp/yt-dlp/issues/7292)) by [linsui](https://github.com/linsui)
- **YahooGyaOIE, YahooGyaOPlayerIE**: [Delete extractors due to website close](https://github.com/yt-dlp/yt-dlp/commit/68be95bd0ca3f76aa63c9812935bd826b3a42e53) ([#6218](https://github.com/yt-dlp/yt-dlp/issues/6218)) by [Lesmiscore](https://github.com/Lesmiscore)
- **yappy**: YappyProfile: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6f69101dc912690338d32e2aab085c32e44eba3f) ([#7346](https://github.com/yt-dlp/yt-dlp/issues/7346)) by [7vlad7](https://github.com/7vlad7)
- **youku**: [Improve error message](https://github.com/yt-dlp/yt-dlp/commit/ef0848abd425dfda6db62baa8d72897eefb0007f) ([#6690](https://github.com/yt-dlp/yt-dlp/issues/6690)) by [carusocr](https://github.com/carusocr)
- **youporn**: [Extract m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/ddae33754ae1f32dd9c64cf895c47d20f6b5f336) by [pukkandan](https://github.com/pukkandan)
- **youtube**
- [Add client name to `format_note` when `-v`](https://github.com/yt-dlp/yt-dlp/commit/c795c39f27244cbce846067891827e4847036441) ([#6254](https://github.com/yt-dlp/yt-dlp/issues/6254)) by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan)
- [Add extractor-arg `include_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/86cb922118b236306310a72657f70426c20e28bb) by [pukkandan](https://github.com/pukkandan)
- [Bypass throttling for `-f17`](https://github.com/yt-dlp/yt-dlp/commit/c9abebb851e6188cb34b9eb744c1863dd46af919) by [pukkandan](https://github.com/pukkandan)
- [Construct fragment list lazily](https://github.com/yt-dlp/yt-dlp/commit/2a23d92d9ec44a0168079e38bcf3d383e5c4c7bb) by [pukkandan](https://github.com/pukkandan) (With fixes in [e389d17](https://github.com/yt-dlp/yt-dlp/commit/e389d172b6f42e4f332ae679dc48543fb7b9b61d))
- [Define strict uploader metadata mapping](https://github.com/yt-dlp/yt-dlp/commit/7666b93604b97e9ada981c6b04ccf5605dd1bd44) ([#6384](https://github.com/yt-dlp/yt-dlp/issues/6384)) by [coletdjnz](https://github.com/coletdjnz)
- [Determine audio language using automatic captions](https://github.com/yt-dlp/yt-dlp/commit/ff9b0e071ffae5543cc309e6f9e647ac51e5846e) by [pukkandan](https://github.com/pukkandan)
- [Extract `channel_is_verified`](https://github.com/yt-dlp/yt-dlp/commit/8213ce28a485e200f6a7e1af1434a987c8e702bd) ([#7213](https://github.com/yt-dlp/yt-dlp/issues/7213)) by [coletdjnz](https://github.com/coletdjnz)
- [Extract `heatmap` data](https://github.com/yt-dlp/yt-dlp/commit/5caf30dbc34f10b0be60676fece635b5c59f0d72) ([#7100](https://github.com/yt-dlp/yt-dlp/issues/7100)) by [tntmod54321](https://github.com/tntmod54321)
- [Extract more metadata for comments](https://github.com/yt-dlp/yt-dlp/commit/c35448b7b14113b35c4415dbfbf488c4731f006f) ([#7179](https://github.com/yt-dlp/yt-dlp/issues/7179)) by [coletdjnz](https://github.com/coletdjnz)
- [Extract uploader metadata for feed/playlist items](https://github.com/yt-dlp/yt-dlp/commit/93e12ed76ef49252dc6869b59d21d0777e5e11af) by [coletdjnz](https://github.com/coletdjnz)
- [Fix comment loop detection for pinned comments](https://github.com/yt-dlp/yt-dlp/commit/141a8dff98874a426d7fbe772e0a8421bb42656f) ([#6714](https://github.com/yt-dlp/yt-dlp/issues/6714)) by [coletdjnz](https://github.com/coletdjnz)
- [Fix continuation loop with no comments](https://github.com/yt-dlp/yt-dlp/commit/18f8fba7c89a87f99cc3313a1795848867e84fff) ([#7148](https://github.com/yt-dlp/yt-dlp/issues/7148)) by [coletdjnz](https://github.com/coletdjnz)
- [Fix parsing `comment_count`](https://github.com/yt-dlp/yt-dlp/commit/071670cbeaa01ddf2cc20a95ae6da25f8f086431) ([#6523](https://github.com/yt-dlp/yt-dlp/issues/6523)) by [nick-cd](https://github.com/nick-cd)
- [Handle incomplete initial data from watch page](https://github.com/yt-dlp/yt-dlp/commit/607510b9f2f67bfe7d33d74031a5c1fe22a24862) ([#6510](https://github.com/yt-dlp/yt-dlp/issues/6510)) by [coletdjnz](https://github.com/coletdjnz)
- [Ignore wrong fps of some formats](https://github.com/yt-dlp/yt-dlp/commit/97afb093d4cbe5df889145afa5f9ede4535e93e4) by [pukkandan](https://github.com/pukkandan)
- [Misc cleanup](https://github.com/yt-dlp/yt-dlp/commit/14a14335b280766fbf5a469ae26836d6c1fe450a) by [coletdjnz](https://github.com/coletdjnz)
- [Prioritize premium formats](https://github.com/yt-dlp/yt-dlp/commit/51a07b0dca4c079d58311c19b6d1c097c24bb021) by [pukkandan](https://github.com/pukkandan)
- [Revert default formats to `https`](https://github.com/yt-dlp/yt-dlp/commit/c6786ff3baaf72a5baa4d56d34058e54cbcf8ceb) by [pukkandan](https://github.com/pukkandan)
- [Support podcasts and releases tabs](https://github.com/yt-dlp/yt-dlp/commit/447afb9eaa65bc677e3245c83e53a8e69c174a3c) by [coletdjnz](https://github.com/coletdjnz)
- [Support shorter relative time format](https://github.com/yt-dlp/yt-dlp/commit/2fb35f6004c7625f0dd493da4a5abf0690f7777c) ([#7191](https://github.com/yt-dlp/yt-dlp/issues/7191)) by [coletdjnz](https://github.com/coletdjnz)
- music_search_url: [Extract title](https://github.com/yt-dlp/yt-dlp/commit/69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2) ([#7102](https://github.com/yt-dlp/yt-dlp/issues/7102)) by [kangalio](https://github.com/kangalio)
- **zaiko**
- [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/345b4c0aedd9d19898ce00d5cef35fe0d277a052) ([#7254](https://github.com/yt-dlp/yt-dlp/issues/7254)) by [c-basalt](https://github.com/c-basalt)
- ZaikoETicket: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5cc09c004bd5edbbada9b041c08a720cadc4f4df) ([#7347](https://github.com/yt-dlp/yt-dlp/issues/7347)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **zdf**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ee0ed0338df328cd986f97315c8162b5a151476d) by [bashonly](https://github.com/bashonly)
- **zee5**: [Fix extraction of new content](https://github.com/yt-dlp/yt-dlp/commit/9d7fde89a40360396f0baa2ee8bf507f92108b32) ([#7280](https://github.com/yt-dlp/yt-dlp/issues/7280)) by [bashonly](https://github.com/bashonly)
- **zingmp3**: [Fix and improve extractors](https://github.com/yt-dlp/yt-dlp/commit/17d7ca84ea723c20668bd9bfa938be7ea0e64f6b) ([#6367](https://github.com/yt-dlp/yt-dlp/issues/6367)) by [hatienl0i261299](https://github.com/hatienl0i261299)
- **zoom**
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/79c77e85b70ae3b9942d5a88c14d021a9bd24222) ([#6741](https://github.com/yt-dlp/yt-dlp/issues/6741)) by [shreyasminocha](https://github.com/shreyasminocha)
- [Fix share URL extraction](https://github.com/yt-dlp/yt-dlp/commit/90c1f5120694105496a6ad9e3ecfc6c25de6cae1) ([#6789](https://github.com/yt-dlp/yt-dlp/issues/6789)) by [bashonly](https://github.com/bashonly)
#### Downloader changes
- **curl**: [Fix progress reporting](https://github.com/yt-dlp/yt-dlp/commit/66aeaac9aa30b5959069ba84e53a5508232deb38) by [pukkandan](https://github.com/pukkandan)
- **fragment**: [Do not sleep between fragments](https://github.com/yt-dlp/yt-dlp/commit/424f3bf03305088df6e01d62f7311be8601ad3f4) by [pukkandan](https://github.com/pukkandan)
#### Postprocessor changes
- [Fix chapters if duration is not extracted](https://github.com/yt-dlp/yt-dlp/commit/01ddec7e661bf90dc4c34e6924eb9d7629886cef) ([#6037](https://github.com/yt-dlp/yt-dlp/issues/6037)) by [bashonly](https://github.com/bashonly)
- [Print newline for `--progress-template`](https://github.com/yt-dlp/yt-dlp/commit/13ff78095372fd98900a32572cf817994c07ccb5) by [pukkandan](https://github.com/pukkandan)
- **EmbedThumbnail, FFmpegMetadata**: [Fix error on attaching thumbnails and info json for mkv/mka](https://github.com/yt-dlp/yt-dlp/commit/0f0875ed555514f32522a0f30554fb08825d5124) ([#6647](https://github.com/yt-dlp/yt-dlp/issues/6647)) by [Lesmiscore](https://github.com/Lesmiscore)
- **FFmpegFixupM3u8PP**: [Check audio codec before fixup](https://github.com/yt-dlp/yt-dlp/commit/3f7e2bd80e3c5d8a1682f20a1b245fcd974f295d) ([#6778](https://github.com/yt-dlp/yt-dlp/issues/6778)) by [bashonly](https://github.com/bashonly)
- **FixupDuplicateMoov**: [Fix bug in triggering](https://github.com/yt-dlp/yt-dlp/commit/26010b5cec50193b98ad7845d1d77450f9f14c2b) by [pukkandan](https://github.com/pukkandan)
#### Misc. changes
- [Add automatic duplicate issue detection](https://github.com/yt-dlp/yt-dlp/commit/15b2d3db1d40b0437fca79d8874d392aa54b3cdd) by [pukkandan](https://github.com/pukkandan)
- **build**
- [Fix macOS target](https://github.com/yt-dlp/yt-dlp/commit/44a79958f0b596ee71e1eb25f158610aada29d1b) by [Grub4K](https://github.com/Grub4K)
- [Implement build verification using `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/b73193c99aa23b135732408a5fcf655c68d731c6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- [Pin `pyinstaller` version for MacOS](https://github.com/yt-dlp/yt-dlp/commit/427a8fafbb0e18c28d0ed7960be838d7b26b88d3) by [pukkandan](https://github.com/pukkandan)
- [Various build workflow improvements](https://github.com/yt-dlp/yt-dlp/commit/c4efa0aefec8daef1de62fd1693f13edf3c8b03c) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- **cleanup**
- Miscellaneous
- [6f2287c](https://github.com/yt-dlp/yt-dlp/commit/6f2287cb18cbfb27518f068d868fa9390fee78ad) by [pukkandan](https://github.com/pukkandan)
- [ad54c91](https://github.com/yt-dlp/yt-dlp/commit/ad54c9130e793ce433bf9da334fa80df9f3aee58) by [freezboltz](https://github.com/freezboltz), [mikf](https://github.com/mikf), [pukkandan](https://github.com/pukkandan)
- **cleanup, utils**: [Split into submodules](https://github.com/yt-dlp/yt-dlp/commit/69bec6730ec9d724bcedeab199d9d684d61423ba) ([#7090](https://github.com/yt-dlp/yt-dlp/issues/7090)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
- **cli_to_api**: [Add script](https://github.com/yt-dlp/yt-dlp/commit/46f1370e9af6f8af8762f67e27e5acb8f0c48a47) by [pukkandan](https://github.com/pukkandan)
- **devscripts**: `make_changelog`: [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/23c39a4beadee382060bb47fdaa21316ca707d38) by [Grub4K](https://github.com/Grub4K)
- **docs**: [Misc improvements](https://github.com/yt-dlp/yt-dlp/commit/c8bc203fbf3bb09914e53f0833eed622ab7edbb9) by [pukkandan](https://github.com/pukkandan)
### 2023.03.04
#### Extractor changes

View file

@ -49,7 +49,7 @@
* [Extractor Options](#extractor-options)
* [CONFIGURATION](#configuration)
* [Configuration file encoding](#configuration-file-encoding)
* [Authentication with .netrc file](#authentication-with-netrc-file)
* [Authentication with netrc](#authentication-with-netrc)
* [Notes about environment variables](#notes-about-environment-variables)
* [OUTPUT TEMPLATE](#output-template)
* [Output template examples](#output-template-examples)
@ -76,7 +76,7 @@
# NEW FEATURES
* Merged with **youtube-dl v2021.12.17+ [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e)** ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/yt-dlp/yt-dlp/commit/42f2d4) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
@ -152,19 +152,20 @@ ### Differences in default behavior
* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this
* Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead
* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
* Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this
* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
For ease of use, a few more compat options are available:
* `--compat-options all`: Use all compat options (Do NOT use)
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect`
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress`. Use this to enable all future compat options
# INSTALLATION
@ -250,7 +251,7 @@ #### Misc
```
<!-- MANPAGE: END EXCLUDED SECTION -->
**Note**: The manpages, shell completion files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
**Note**: The manpages, shell completion (autocomplete) files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
## DEPENDENCIES
Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
@ -609,12 +610,14 @@ ## Download Options:
--no-hls-use-mpegts Do not use the mpegts container for HLS
videos. This is default when not downloading
live streams
--download-sections REGEX Download only chapters whose title matches
the given regular expression. Time ranges
prefixed by a "*" can also be used in place
of chapters to download the specified range.
Needs ffmpeg. This option can be used
multiple times to download multiple
--download-sections REGEX Download only chapters that match the
regular expression. A "*" prefix denotes
time-range instead of chapter. Negative
timestamps are calculated from the end.
"*from-url" can be used to download between
the "start_time" and "end_time" extracted
from the URL. Needs ffmpeg. This option can
be used multiple times to download multiple
sections, e.g. --download-sections
"*10:15-inf" --download-sections "intro"
--downloader [PROTO:]NAME Name or path of the external downloader to
@ -698,9 +701,8 @@ ## Filesystem Options:
--write-description etc. (default)
--no-write-playlist-metafiles Do not write playlist metadata when using
--write-info-json, --write-description etc.
--clean-info-json Remove some private fields such as filenames
from the infojson. Note that it could still
contain some personal information (default)
--clean-info-json Remove some internal metadata such as
filenames from the infojson (default)
--no-clean-info-json Write all fields to the infojson
--write-comments Retrieve video comments to be placed in the
infojson. The comments are fetched even
@ -728,7 +730,7 @@ ## Filesystem Options:
By default, all containers of the most
recently accessed profile are used.
Currently supported keyrings are: basictext,
gnomekeyring, kwallet
gnomekeyring, kwallet, kwallet5, kwallet6
--no-cookies-from-browser Do not load cookies from browser (default)
--cache-dir DIR Location in the filesystem where yt-dlp can
store some downloaded information (such as
@ -909,6 +911,8 @@ ## Authentication Options:
--netrc-location PATH Location of .netrc authentication data;
either the path or its containing directory.
Defaults to ~/.netrc
--netrc-cmd NETRC_CMD Command to execute to get the credentials
for an extractor.
--video-password PASSWORD Video password (vimeo, youku)
--ap-mso MSO Adobe Pass multiple-system operator (TV
provider) identifier, use --ap-list-mso for
@ -1038,13 +1042,10 @@ ## Post-Processing Options:
that of --use-postprocessor (default:
after_move). Same syntax as the output
template can be used to pass any field as
arguments to the command. After download, an
additional field "filepath" that contains
the final path of the downloaded file is
also available, and if no fields are passed,
%(filepath,_filename|)q is appended to the
end of the command. This option can be used
multiple times
arguments to the command. If no fields are
passed, %(filepath,_filename|)q is appended
to the end of the command. This option can
be used multiple times
--no-exec Remove any previously defined --exec
--convert-subs FORMAT Convert the subtitles to another format
(currently supported: ass, lrc, srt, vtt)
@ -1202,7 +1203,7 @@ ### Configuration file encoding
If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM.
### Authentication with `.netrc` file
### Authentication with netrc
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
```
@ -1222,6 +1223,14 @@ ### Authentication with `.netrc` file
The default location of the .netrc file is `~` (see below).
As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor.
E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg`
```
yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc
```
### Notes about environment variables
* Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation
* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location`
@ -1292,6 +1301,7 @@ # OUTPUT TEMPLATE
- `channel` (string): Full name of the channel the video is uploaded on
- `channel_id` (string): Id of the channel
- `channel_follower_count` (numeric): Number of followers of the channel
- `channel_is_verified` (boolean): Whether the channel is verified on the platform
- `location` (string): Physical location where the video was filmed
- `duration` (numeric): Length of the video in seconds
- `duration_string` (string): Length of the video (HH:mm:ss)
@ -1376,7 +1386,10 @@ # OUTPUT TEMPLATE
- `subtitles_table` (table): The subtitle format table as printed by `--list-subs`
- `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs`
Available only after the video is downloaded (`post_process`/`after_move`):
- `filepath`: Actual path of downloaded video file
Available only in `--sponsorblock-chapter-title`:
- `start_time` (numeric): Start time of the chapter in seconds
@ -1422,7 +1435,7 @@ # Download YouTube playlist videos in separate directories according to their up
$ yt-dlp -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
# Prefix playlist index with " - " separator, but only if it is available
$ yt-dlp -o '%(playlist_index|)s%(playlist_index& - |)s%(title)s.%(ext)s' BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists"
$ yt-dlp -o "%(playlist_index&{} - |)s%(title)s.%(ext)s" BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists"
# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
$ yt-dlp -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists"
@ -1769,7 +1782,7 @@ # Do not set any "synopsis" in the video metadata
$ yt-dlp --parse-metadata ":(?P<meta_synopsis>)"
# Remove "formats" field from the infojson by setting it to an empty string
$ yt-dlp --parse-metadata ":(?P<formats>)" -j
$ yt-dlp --parse-metadata "video::(?P<formats>)" --write-info-json
# Replace all spaces and "_" in title and uploader with a `-`
$ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-"
@ -1787,7 +1800,7 @@ # EXTRACTOR ARGUMENTS
#### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
@ -1843,11 +1856,14 @@ #### rokfinchannel
#### twitter
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
### wrestleuniverse
#### wrestleuniverse
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
#### twitchstream (Twitch)
* `client_id`: Client ID value to be sent with GraphQL requests, e.g. `twitchstream:client_id=kimne78kx3ncx6brgo4mv6wki5h1ko`
#### twitch
* `client_id`: Client ID value to be sent with GraphQL requests, e.g. `twitch:client_id=kimne78kx3ncx6brgo4mv6wki5h1ko`
#### nhkradirulive (NHK らじる★らじる LIVE)
* `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo`
**Note**: These options may be changed/removed in the future without concern for backward compatibility

View file

@ -8,5 +8,53 @@
"action": "add",
"when": "776d1c3f0c9b00399896dd2e40e78e9a43218109",
"short": "[priority] **YouTube throttling fixes!**"
},
{
"action": "remove",
"when": "2e023649ea4e11151545a34dc1360c114981a236"
},
{
"action": "add",
"when": "01aba2519a0884ef17d5f85608dbd2a455577147",
"short": "[priority] YouTube: Improved throttling and signature fixes"
},
{
"action": "change",
"when": "c86e433c35fe5da6cb29f3539eef97497f84ed38",
"short": "[extractor/niconico:series] Fix extraction (#6898)",
"authors": ["sqrtNOT"]
},
{
"action": "change",
"when": "69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2",
"short": "[extractor/youtube:music_search_url] Extract title (#7102)",
"authors": ["kangalio"]
},
{
"action": "change",
"when": "8417f26b8a819cd7ffcd4e000ca3e45033e670fb",
"short": "Add option `--color` (#6904)",
"authors": ["Grub4K"]
},
{
"action": "change",
"when": "7b37e8b23691613f331bd4ebc9d639dd6f93c972",
"short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL"
},
{
"action": "change",
"when": "1e75d97db21152acc764b30a688e516f04b8a142",
"short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such"
},
{
"action": "change",
"when": "f2ff0f6f1914b82d4a51681a72cc0828115dcb4a",
"short": "[extractor/motherless] Add gallery support, fix groups (#7211)",
"authors": ["rexlambert22", "Ti4eeT4e"]
},
{
"action": "change",
"when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700",
"short": "[misc] Revert \"Add automatic duplicate issue detection\""
}
]

View file

@ -19,11 +19,11 @@ def parse_patched_options(opts):
'extract_flat': False,
'concat_playlist': 'never',
})
yt_dlp.options.__dict__['create_parser'] = lambda: patched_parser
yt_dlp.options.create_parser = lambda: patched_parser
try:
return yt_dlp.parse_options(opts)
finally:
yt_dlp.options.__dict__['create_parser'] = create_parser
yt_dlp.options.create_parser = create_parser
default_opts = parse_patched_options([]).ydl_opts

View file

@ -6,6 +6,7 @@
age_restricted,
bug_reports_message,
classproperty,
variadic,
write_string,
)

View file

@ -44,7 +44,7 @@ def commit_lookup(cls):
return {
name: group
for group, names in {
cls.PRIORITY: {''},
cls.PRIORITY: {'priority'},
cls.CORE: {
'aes',
'cache',
@ -68,7 +68,7 @@ def commit_lookup(cls):
'misc',
'test',
},
cls.EXTRACTOR: {'extractor', 'extractors'},
cls.EXTRACTOR: {'extractor'},
cls.DOWNLOADER: {'downloader'},
cls.POSTPROCESSOR: {'postprocessor'},
}.items()
@ -196,7 +196,7 @@ def _prepare_cleanup_misc_items(self, items):
for commit_infos in cleanup_misc_items.values():
sorted_items.append(CommitInfo(
'cleanup', ('Miscellaneous',), ', '.join(
self._format_message_link(None, info.commit.hash)
self._format_message_link(None, info.commit.hash).strip()
for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
[], Commit(None, '', commit_infos[0].commit.authors), []))
@ -205,10 +205,10 @@ def _prepare_cleanup_misc_items(self, items):
def format_single_change(self, info):
message = self._format_message_link(info.message, info.commit.hash)
if info.issues:
message = f'{message} ({self._format_issues(info.issues)})'
message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
if info.commit.authors:
message = f'{message} by {self._format_authors(info.commit.authors)}'
message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
if info.fixes:
fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
@ -217,14 +217,16 @@ def format_single_change(self, info):
if authors != info.commit.authors:
fix_message = f'{fix_message} by {self._format_authors(authors)}'
message = f'{message} (With fixes in {fix_message})'
message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
return message
return message[:-1]
def _format_message_link(self, message, hash):
assert message or hash, 'Improperly defined commit message or override'
message = message if message else hash[:HASH_LENGTH]
return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
if not hash:
return f'{message}\n'
return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
def _format_issues(self, issues):
return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
@ -323,7 +325,7 @@ def apply_overrides(self, overrides):
logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
continue
override_hash = override.get('hash')
override_hash = override.get('hash') or when
if override['action'] == 'add':
commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
logger.info(f'ADD {commit}')
@ -337,7 +339,7 @@ def apply_overrides(self, overrides):
elif override['action'] == 'change':
if override_hash not in self._commits:
continue
commit = Commit(override_hash, override['short'], override['authors'])
commit = Commit(override_hash, override['short'], override.get('authors') or [])
logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
self._commits[commit.hash] = commit
@ -348,7 +350,7 @@ def groups(self):
for commit in self:
upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
if upstream_re:
commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
match = self.MESSAGE_RE.fullmatch(commit.short)
if not match:
@ -394,10 +396,10 @@ def details_from_prefix(prefix):
return CommitGroup.CORE, None, ()
prefix, _, details = prefix.partition('/')
prefix = prefix.strip().lower()
prefix = prefix.strip()
details = details.strip()
group = CommitGroup.get(prefix)
group = CommitGroup.get(prefix.lower())
if group is CommitGroup.PRIORITY:
prefix, _, details = details.partition('/')

View file

@ -150,7 +150,9 @@ # Supported sites
- **bfmtv**
- **bfmtv:article**
- **bfmtv:live**
- **BibelTV**
- **bibeltv:live**: BibelTV live program
- **bibeltv:series**: BibelTV series playlist
- **bibeltv:video**: BibelTV single video
- **Bigflix**
- **Bigo**
- **Bild**: Bild.de
@ -183,12 +185,17 @@ # Supported sites
- **Bloomberg**
- **BokeCC**
- **BongaCams**
- **BooyahClips**
- **BostonGlobe**
- **Box**
- **BoxCastVideo**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk
- **BrainPOP**: [*brainpop*](## "netrc machine")
- **BrainPOPELL**: [*brainpop*](## "netrc machine")
- **BrainPOPEsp**: [*brainpop*](## "netrc machine") BrainPOP Español
- **BrainPOPFr**: [*brainpop*](## "netrc machine") BrainPOP Français
- **BrainPOPIl**: [*brainpop*](## "netrc machine") BrainPOP Hebrew
- **BrainPOPJr**: [*brainpop*](## "netrc machine")
- **BravoTV**
- **Break**
- **BreitBart**
@ -207,6 +214,8 @@ # Supported sites
- **CAM4**
- **Camdemy**
- **CamdemyFolder**
- **CamFMEpisode**
- **CamFMShow**
- **CamModels**
- **Camsoda**
- **CamtasiaEmbed**
@ -214,8 +223,6 @@ # Supported sites
- **CanalAlpha**
- **canalc2.tv**
- **Canalplus**: mycanal.fr and piwiplus.fr
- **Canvas**
- **CanvasEen**: canvas.be and een.be
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
@ -225,8 +232,10 @@ # Supported sites
- **CBSInteractive**
- **CBSLocal**
- **CBSLocalArticle**
- **CBSLocalLive**
- **cbsnews**: CBS News
- **cbsnews:embed**
- **cbsnews:live**: CBS News Livestream
- **cbsnews:livevideo**: CBS News Live Videos
- **cbssports**
- **cbssports:embed**
@ -252,6 +261,7 @@ # Supported sites
- **CiscoLiveSession**
- **ciscowebex**: Cisco Webex
- **CJSW**
- **Clipchamp**
- **cliphunter**
- **Clippit**
- **ClipRs**
@ -271,6 +281,7 @@ # Supported sites
- **CNNIndonesia**
- **ComedyCentral**
- **ComedyCentralTV**
- **ConanClassic**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **CONtv**
- **CookingChannel**
@ -286,7 +297,10 @@ # Supported sites
- **CrooksAndLiars**
- **CrowdBunker**
- **CrowdBunkerChannel**
- **Crtvg**
- **crunchyroll**: [*crunchyroll*](## "netrc machine")
- **crunchyroll:artist**: [*crunchyroll*](## "netrc machine")
- **crunchyroll:music**: [*crunchyroll*](## "netrc machine")
- **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine")
- **CSpan**: C-SPAN
- **CSpanCongress**
@ -301,6 +315,8 @@ # Supported sites
- **CWTV**
- **Cybrary**: [*cybrary*](## "netrc machine")
- **CybraryCourse**: [*cybrary*](## "netrc machine")
- **DacastPlaylist**
- **DacastVOD**
- **Daftsex**
- **DagelijkseKost**: dagelijksekost.een.be
- **DailyMail**
@ -331,6 +347,7 @@ # Supported sites
- **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor
- **DigitallySpeaking**
- **Digiteka**
- **DiscogsReleasePlaylist**
- **Discovery**
- **DiscoveryLife**
- **DiscoveryNetworksDe**
@ -341,6 +358,8 @@ # Supported sites
- **DiscoveryPlusItalyShow**
- **Disney**
- **DIYNetwork**
- **dlf**
- **dlf:corpus**: DLF Multi-feed Archives
- **dlive:stream**
- **dlive:vod**
- **Dotsub**
@ -378,6 +397,7 @@ # Supported sites
- **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine")
- **Einthusan**
- **eitb.tv**
- **ElevenSports**
- **EllenTube**
- **EllenTubePlaylist**
- **EllenTubeVideo**
@ -400,6 +420,7 @@ # Supported sites
- **ESPNArticle**
- **ESPNCricInfo**
- **EsriVideo**
- **EttuTv**
- **Europa**
- **EuroParlWebstream**
- **EuropeanTour**
@ -460,6 +481,7 @@ # Supported sites
- **funimation:page**: [*funimation*](## "netrc machine")
- **funimation:show**: [*funimation*](## "netrc machine")
- **Funk**
- **Funker530**
- **Fusion**
- **Fux**
- **FuyinTV**
@ -493,10 +515,17 @@ # Supported sites
- **GlattvisionTVLive**: [*glattvisiontv*](## "netrc machine")
- **GlattvisionTVRecordings**: [*glattvisiontv*](## "netrc machine")
- **Glide**: Glide mobile video messages (glide.me)
- **GlobalCyclingNetworkPlus**
- **GlobalPlayerAudio**
- **GlobalPlayerAudioEpisode**
- **GlobalPlayerLive**
- **GlobalPlayerLivePlaylist**
- **GlobalPlayerVideo**
- **Globo**: [*globo*](## "netrc machine")
- **GloboArticle**
- **glomex**: Glomex videos
- **glomex:embed**: Glomex embedded videos
- **GMANetworkVideo**
- **Go**
- **GoDiscovery**
- **GodTube**
@ -522,7 +551,6 @@ # Supported sites
- **Heise**
- **HellPorno**
- **Helsinki**: helsinki.fi
- **HentaiStigma**
- **hetklokhuis**
- **hgtv.com:show**
- **HGTVDe**
@ -535,6 +563,8 @@ # Supported sites
- **hitbox:live**
- **HitRecord**
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
- **HollywoodReporter**
- **HollywoodReporterPlaylist**
- **Holodex**
- **HotNewHipHop**
- **hotstar**
@ -558,6 +588,7 @@ # Supported sites
- **Hypem**
- **Hytale**
- **Icareus**
- **IdolPlus**
- **iflix:episode**
- **IflixSeries**
- **ign.com**
@ -600,9 +631,9 @@ # Supported sites
- **ivi:compilation**: ivi.ru compilations
- **ivideon**: Ivideon TV
- **IVXPlayer**
- **Iwara**
- **iwara:playlist**
- **iwara:user**
- **iwara**: [*iwara*](## "netrc machine")
- **iwara:playlist**: [*iwara*](## "netrc machine")
- **iwara:user**: [*iwara*](## "netrc machine")
- **Ixigua**
- **Izlesene**
- **Jable**
@ -612,6 +643,7 @@ # Supported sites
- **JeuxVideo**
- **Joj**
- **Jove**
- **JStream**
- **JWPlatform**
- **Kakao**
- **Kaltura**
@ -678,8 +710,6 @@ # Supported sites
- **limelight**
- **limelight:channel**
- **limelight:channel_list**
- **LineLive**
- **LineLiveChannel**
- **LinkedIn**: [*linkedin*](## "netrc machine")
- **linkedin:learning**: [*linkedin*](## "netrc machine")
- **linkedin:learning:course**: [*linkedin*](## "netrc machine")
@ -785,6 +815,7 @@ # Supported sites
- **MonsterSirenHypergryphMusic**
- **Morningstar**: morningstar.com
- **Motherless**
- **MotherlessGallery**
- **MotherlessGroup**
- **Motorsport**: motorsport.com
- **MotorTrend**
@ -806,6 +837,7 @@ # Supported sites
- **mtvservices:embedded**
- **MTVUutisetArticle**
- **MuenchenTV**: münchen.tv
- **MujRozhlas**
- **Murrtube**
- **MurrtubeUser**: Murrtube user profile
- **MuseScore**
@ -827,6 +859,7 @@ # Supported sites
- **MyVideoGe**
- **MyVidster**
- **MyviEmbed**
- **Mzaalo**
- **n-tv.de**
- **N1Info:article**
- **N1InfoAsset**
@ -858,6 +891,7 @@ # Supported sites
- **Nebula**: [*watchnebula*](## "netrc machine")
- **nebula:channel**: [*watchnebula*](## "netrc machine")
- **nebula:subscriptions**: [*watchnebula*](## "netrc machine")
- **NekoHacker**
- **NerdCubedFeed**
- **netease:album**: 网易云音乐 - 专辑
- **netease:djradio**: 网易云音乐 - 电台
@ -893,6 +927,9 @@ # Supported sites
- **NhkForSchoolBangumi**
- **NhkForSchoolProgramList**
- **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)
- **NhkRadioNewsPage**
- **NhkRadiru**: NHK らじる (Radiru/Rajiru)
- **NhkRadiruLive**
- **NhkVod**
- **NhkVodProgram**
- **nhl.com**
@ -903,6 +940,7 @@ # Supported sites
- **nicknight**
- **niconico**: [*niconico*](## "netrc machine") ニコニコ動画
- **niconico:history**: NicoNico user history or likes. Requires cookies.
- **niconico:live**: ニコニコ生放送
- **niconico:playlist**
- **niconico:series**
- **niconico:tag**: NicoNico video tag URLs
@ -947,6 +985,7 @@ # Supported sites
- **NRKTVSeries**
- **NRLTV**
- **ntv.ru**
- **NubilesPorn**: [*nubiles-porn*](## "netrc machine")
- **Nuvid**
- **NYTimes**
- **NYTimesArticle**
@ -987,6 +1026,7 @@ # Supported sites
- **OsnatelTVLive**: [*osnateltv*](## "netrc machine")
- **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine")
- **OutsideTV**
- **OwnCloud**
- **PacktPub**: [*packtpub*](## "netrc machine")
- **PacktPubCourse**
- **PalcoMP3:artist**
@ -999,6 +1039,7 @@ # Supported sites
- **ParamountNetwork**
- **ParamountPlus**
- **ParamountPlusSeries**
- **ParamountPressExpress**
- **Parler**: Posts on parler.com
- **parliamentlive.tv**: UK parliament videos
- **Parlview**
@ -1016,6 +1057,7 @@ # Supported sites
- **PerformGroup**
- **periscope**: Periscope
- **periscope:user**: Periscope user videos
- **PGATour**
- **PhilharmonieDeParis**: Philharmonie de Paris
- **phoenix.de**
- **Photobucket**
@ -1057,7 +1099,6 @@ # Supported sites
- **PolskieRadio**
- **polskieradio:audition**
- **polskieradio:category**
- **polskieradio:kierowcow**
- **polskieradio:legacy**
- **polskieradio:player**
- **polskieradio:podcast**
@ -1122,6 +1163,7 @@ # Supported sites
- **radlive:channel**
- **radlive:season**
- **Rai**
- **RaiCultura**
- **RaiNews**
- **RaiPlay**
- **RaiPlayLive**
@ -1142,11 +1184,12 @@ # Supported sites
- **RCTIPlusSeries**
- **RCTIPlusTV**
- **RDS**: RDS.ca
- **Recurbate**
- **RedBull**
- **RedBullEmbed**
- **RedBullTV**
- **RedBullTVRrnContent**
- **Reddit**
- **Reddit**: [*reddit*](## "netrc machine")
- **RedGifs**
- **RedGifsSearch**: Redgifs search
- **RedGifsUser**: Redgifs user
@ -1157,6 +1200,7 @@ # Supported sites
- **Restudy**
- **Reuters**
- **ReverbNation**
- **RheinMainTV**
- **RICE**
- **RMCDecouverte**
- **RockstarGames**
@ -1186,6 +1230,9 @@ # Supported sites
- **RTP**
- **RTRFM**
- **RTS**: RTS.ch
- **RTVCKaltura**
- **RTVCPlay**
- **RTVCPlayEmbed**
- **rtve.es:alacarta**: RTVE a la carta
- **rtve.es:audio**: RTVE audio
- **rtve.es:infantil**: RTVE infantil
@ -1239,6 +1286,7 @@ # Supported sites
- **SCTE**: [*scte*](## "netrc machine")
- **SCTECourse**: [*scte*](## "netrc machine")
- **Seeker**
- **SenalColombiaLive**
- **SenateGov**
- **SenateISVP**
- **SendtoNews**
@ -1315,6 +1363,7 @@ # Supported sites
- **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
- **stanfordoc**: Stanford Open ClassRoom
- **StarTrek**
- **startv**
@ -1427,6 +1476,7 @@ # Supported sites
- **TrailerAddict**: (**Currently broken**)
- **TravelChannel**
- **Triller**: [*triller*](## "netrc machine")
- **TrillerShort**
- **TrillerUser**: [*triller*](## "netrc machine")
- **Trilulilu**
- **Trovo**
@ -1499,12 +1549,12 @@ # Supported sites
- **TwitchVideos**: [*twitch*](## "netrc machine")
- **TwitchVideosClips**: [*twitch*](## "netrc machine")
- **TwitchVideosCollections**: [*twitch*](## "netrc machine")
- **twitter**
- **twitter:amplify**
- **twitter:broadcast**
- **twitter**: [*twitter*](## "netrc machine")
- **twitter:amplify**: [*twitter*](## "netrc machine")
- **twitter:broadcast**: [*twitter*](## "netrc machine")
- **twitter:card**
- **twitter:shortener**
- **twitter:spaces**
- **twitter:shortener**: [*twitter*](## "netrc machine")
- **twitter:spaces**: [*twitter*](## "netrc machine")
- **Txxx**
- **udemy**: [*udemy*](## "netrc machine")
- **udemy:course**: [*udemy*](## "netrc machine")
@ -1541,7 +1591,6 @@ # Supported sites
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
- **vhx:embed**: [*vimeo*](## "netrc machine")
- **Viafree**
- **vice**
- **vice:article**
- **vice:show**
@ -1607,8 +1656,8 @@ # Supported sites
- **voicy**
- **voicy:channel**
- **VolejTV**
- **Voot**
- **VootSeries**
- **Voot**: [*voot*](## "netrc machine")
- **VootSeries**: [*voot*](## "netrc machine")
- **VoxMedia**
- **VoxMediaVolume**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
@ -1616,7 +1665,7 @@ # Supported sites
- **vqq:video**
- **Vrak**
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
- **VrtNU**: [*vrtnu*](## "netrc machine") VrtNU.be
- **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX
- **vrv**: [*vrv*](## "netrc machine")
- **vrv:series**
- **VShare**
@ -1660,7 +1709,16 @@ # Supported sites
- **WeiqiTV**: WQTV
- **wetv:episode**
- **WeTvSeries**
- **Weverse**: [*weverse*](## "netrc machine")
- **WeverseLive**: [*weverse*](## "netrc machine")
- **WeverseLiveTab**: [*weverse*](## "netrc machine")
- **WeverseMedia**: [*weverse*](## "netrc machine")
- **WeverseMediaTab**: [*weverse*](## "netrc machine")
- **WeverseMoment**: [*weverse*](## "netrc machine")
- **WeVidi**
- **Weyyak**
- **whowatch**
- **Whyp**
- **wikimedia.org**
- **Willow**
- **WimTV**
@ -1674,13 +1732,17 @@ # Supported sites
- **WorldStarHipHop**
- **wppilot**
- **wppilot:channels**
- **WrestleUniversePPV**
- **WrestleUniverseVOD**
- **WrestleUniversePPV**: [*wrestleuniverse*](## "netrc machine")
- **WrestleUniverseVOD**: [*wrestleuniverse*](## "netrc machine")
- **WSJ**: Wall Street Journal
- **WSJArticle**
- **WWE**
- **wyborcza:video**
- **WyborczaPodcast**
- **wykop:dig**
- **wykop:dig:comment**
- **wykop:post**
- **wykop:post:comment**
- **Xanimu**
- **XBef**
- **XboxClips**
@ -1701,8 +1763,6 @@ # Supported sites
- **xvideos:quickies**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
- **yahoo:gyao**
- **yahoo:gyao:player**
- **yahoo:japannews**: Yahoo! Japan News
- **YandexDisk**
- **yandexmusic:album**: Яндекс.Музыка - Альбом
@ -1714,6 +1774,7 @@ # Supported sites
- **YandexVideoPreview**
- **YapFiles**
- **Yappy**
- **YappyProfile**
- **YesJapan**
- **yinyuetai:video**: 音悦Tai
- **YleAreena**
@ -1746,6 +1807,8 @@ # Supported sites
- **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies)
- **YoutubeLivestreamEmbed**: YouTube livestream embeds
- **YoutubeYtBe**: youtu.be
- **Zaiko**
- **ZaikoETicket**
- **Zapiks**
- **Zattoo**: [*zattoo*](## "netrc machine")
- **ZattooLive**: [*zattoo*](## "netrc machine")
@ -1763,6 +1826,7 @@ # Supported sites
- **zingmp3:album**
- **zingmp3:chart-home**
- **zingmp3:chart-music-video**
- **zingmp3:hub**
- **zingmp3:user**
- **zingmp3:week-chart**
- **zoom**

View file

@ -917,8 +917,6 @@ def test_parse_m3u8_formats(self):
'acodec': 'mp4a.40.2',
'video_ext': 'mp4',
'audio_ext': 'none',
'vbr': 263.851,
'abr': 0,
}, {
'format_id': '577',
'format_index': None,
@ -936,8 +934,6 @@ def test_parse_m3u8_formats(self):
'acodec': 'mp4a.40.2',
'video_ext': 'mp4',
'audio_ext': 'none',
'vbr': 577.61,
'abr': 0,
}, {
'format_id': '915',
'format_index': None,
@ -955,8 +951,6 @@ def test_parse_m3u8_formats(self):
'acodec': 'mp4a.40.2',
'video_ext': 'mp4',
'audio_ext': 'none',
'vbr': 915.905,
'abr': 0,
}, {
'format_id': '1030',
'format_index': None,
@ -974,8 +968,6 @@ def test_parse_m3u8_formats(self):
'acodec': 'mp4a.40.2',
'video_ext': 'mp4',
'audio_ext': 'none',
'vbr': 1030.138,
'abr': 0,
}, {
'format_id': '1924',
'format_index': None,
@ -993,8 +985,6 @@ def test_parse_m3u8_formats(self):
'acodec': 'mp4a.40.2',
'video_ext': 'mp4',
'audio_ext': 'none',
'vbr': 1924.009,
'abr': 0,
}],
{
'en': [{

View file

@ -630,6 +630,7 @@ def test_add_extra_info(self):
self.assertEqual(test_dict['playlist'], 'funny videos')
outtmpl_info = {
'id': '1234',
'id': '1234',
'ext': 'mp4',
'width': None,
@ -668,7 +669,7 @@ def test(tmpl, expected, *, info=None, **params):
for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected):
if callable(expect):
self.assertTrue(expect(got), f'Wrong {name} from {tmpl}')
else:
elif expect is not None:
self.assertEqual(got, expect, f'Wrong {name} from {tmpl}')
# Side-effects
@ -754,20 +755,23 @@ def expect_same_infodict(out):
test('%(ext)c', 'm')
test('%(id)d %(id)r', "1234 '1234'")
test('%(id)r %(height)r', "'1234' 1080")
test('%(title5)a %(height)a', (R"'\xe1\xe9\xed \U0001d400' 1080", None))
test('%(ext)s-%(ext|def)d', 'mp4-def')
test('%(width|0)04d', '0000')
test('%(width|0)04d', '0')
test('a%(width|b)d', 'ab', outtmpl_na_placeholder='none')
FORMATS = self.outtmpl_info['formats']
sanitize = lambda x: x.replace(':', '').replace('"', "").replace('\n', ' ')
# Custom type casting
test('%(formats.:.id)l', 'id 1, id 2, id 3')
test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3'))
test('%(ext)l', 'mp4')
test('%(formats.:.id) 18l', ' id 1, id 2, id 3')
test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS))))
test('%(formats)#j', (json.dumps(FORMATS, indent=4), sanitize(json.dumps(FORMATS, indent=4))))
test('%(formats)j', (json.dumps(FORMATS), None))
test('%(formats)#j', (
json.dumps(FORMATS, indent=4),
json.dumps(FORMATS, indent=4).replace(':', '').replace('"', "").replace('\n', ' ')
))
test('%(title5).3B', 'á')
test('%(title5)U', 'áéí 𝐀')
test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
@ -792,8 +796,8 @@ def expect_same_infodict(out):
test('%(title|%)s %(title|%%)s', '% %%')
test('%(id+1-height+3)05d', '00158')
test('%(width+100)05d', 'NA')
test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0]))))
test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0]))))
test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None))
test('%(formats.0)r', (repr(FORMATS[0]), None))
test('%(height.0)03d', '001')
test('%(-height.0)04d', '-001')
test('%(formats.-1.id)s', FORMATS[-1]['id'])
@ -805,7 +809,7 @@ def expect_same_infodict(out):
out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]}
if 'height' in f else {'id': f['id']}
for f in FORMATS])
test('%(formats.:.{id,height.:2})j', (out, sanitize(out)))
test('%(formats.:.{id,height.:2})j', (out, None))
test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS))
test('%(.{id,title})j', ('{"id": "1234"}', '{id 1234}'))

View file

@ -12,19 +12,62 @@
from yt_dlp.jsinterp import JS_Undefined, JSInterpreter
class NaN:
pass
class TestJSInterpreter(unittest.TestCase):
def _test(self, code, ret, func='f', args=()):
self.assertEqual(JSInterpreter(code).call_function(func, *args), ret)
def _test(self, jsi_or_code, expected, func='f', args=()):
if isinstance(jsi_or_code, str):
jsi_or_code = JSInterpreter(jsi_or_code)
got = jsi_or_code.call_function(func, *args)
if expected is NaN:
self.assertTrue(math.isnan(got), f'{got} is not NaN')
else:
self.assertEqual(got, expected)
def test_basic(self):
jsi = JSInterpreter('function f(){;}')
self.assertEqual(repr(jsi.extract_function('f')), 'F<f>')
self.assertEqual(jsi.call_function('f'), None)
self._test(jsi, None)
self._test('function f(){return 42;}', 42)
self._test('function f(){42}', None)
self._test('var f = function(){return 42;}', 42)
def test_add(self):
self._test('function f(){return 42 + 7;}', 49)
self._test('function f(){return 42 + undefined;}', NaN)
self._test('function f(){return 42 + null;}', 42)
def test_sub(self):
self._test('function f(){return 42 - 7;}', 35)
self._test('function f(){return 42 - undefined;}', NaN)
self._test('function f(){return 42 - null;}', 42)
def test_mul(self):
self._test('function f(){return 42 * 7;}', 294)
self._test('function f(){return 42 * undefined;}', NaN)
self._test('function f(){return 42 * null;}', 0)
def test_div(self):
jsi = JSInterpreter('function f(a, b){return a / b;}')
self._test(jsi, NaN, args=(0, 0))
self._test(jsi, NaN, args=(JS_Undefined, 1))
self._test(jsi, float('inf'), args=(2, 0))
self._test(jsi, 0, args=(0, 3))
def test_mod(self):
self._test('function f(){return 42 % 7;}', 0)
self._test('function f(){return 42 % 0;}', NaN)
self._test('function f(){return 42 % undefined;}', NaN)
def test_exp(self):
self._test('function f(){return 42 ** 2;}', 1764)
self._test('function f(){return 42 ** undefined;}', NaN)
self._test('function f(){return 42 ** null;}', 1)
self._test('function f(){return undefined ** 42;}', NaN)
def test_calc(self):
self._test('function f(a){return 2*a+1;}', 7, args=[3])
@ -95,16 +138,15 @@ def test_precedence(self):
''', [20, 20, 30, 40, 50])
def test_builtins(self):
jsi = JSInterpreter('function f() { return NaN }')
self.assertTrue(math.isnan(jsi.call_function('f')))
self._test('function f() { return NaN }', NaN)
def test_date(self):
self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000)
jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }')
self.assertEqual(jsi.call_function('f', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
self.assertEqual(jsi.call_function('f', '12/31/1969 18:01:26 MDT'), 86000) # m/d/y
self.assertEqual(jsi.call_function('f', '1 January 1970 00:00:00 UTC'), 0)
self._test(jsi, 86000, args=['Wednesday 31 December 1969 18:01:26 MDT'])
self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT']) # m/d/y
self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
def test_call(self):
jsi = JSInterpreter('''
@ -112,8 +154,8 @@ def test_call(self):
function y(a) { return x() + (a?a:0); }
function z() { return y(3); }
''')
self.assertEqual(jsi.call_function('z'), 5)
self.assertEqual(jsi.call_function('y'), 2)
self._test(jsi, 5, func='z')
self._test(jsi, 2, func='y')
def test_if(self):
self._test('''
@ -160,9 +202,9 @@ def test_switch(self):
default:x=0;
} return x }
''')
self.assertEqual(jsi.call_function('f', 1), 7)
self.assertEqual(jsi.call_function('f', 3), 6)
self.assertEqual(jsi.call_function('f', 5), 0)
self._test(jsi, 7, args=[1])
self._test(jsi, 6, args=[3])
self._test(jsi, 0, args=[5])
def test_switch_default(self):
jsi = JSInterpreter('''
@ -175,9 +217,9 @@ def test_switch_default(self):
case 1: x+=1;
} return x }
''')
self.assertEqual(jsi.call_function('f', 1), 2)
self.assertEqual(jsi.call_function('f', 5), 11)
self.assertEqual(jsi.call_function('f', 9), 14)
self._test(jsi, 2, args=[1])
self._test(jsi, 11, args=[5])
self._test(jsi, 14, args=[9])
def test_try(self):
self._test('function f() { try{return 10} catch(e){return 5} }', 10)
@ -305,12 +347,12 @@ def test_replace(self):
def test_char_code_at(self):
jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}')
self.assertEqual(jsi.call_function('f', 0), 116)
self.assertEqual(jsi.call_function('f', 1), 101)
self.assertEqual(jsi.call_function('f', 2), 115)
self.assertEqual(jsi.call_function('f', 3), 116)
self.assertEqual(jsi.call_function('f', 4), None)
self.assertEqual(jsi.call_function('f', 'not_a_number'), 116)
self._test(jsi, 116, args=[0])
self._test(jsi, 101, args=[1])
self._test(jsi, 115, args=[2])
self._test(jsi, 116, args=[3])
self._test(jsi, None, args=[4])
self._test(jsi, 116, args=['not_a_number'])
def test_bitwise_operators_overflow(self):
self._test('function f(){return -524999584 << 5}', 379882496)

View file

@ -62,7 +62,12 @@
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
)
),
(
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
),
]
_NSIG_TESTS = [
@ -150,6 +155,14 @@
'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js',
'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w',
),
(
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js',
'1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A',
),
(
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
),
]
@ -226,7 +239,7 @@ def n_sig(jscode, sig_input):
make_sig_test = t_factory(
'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$'))
for test_spec in _SIG_TESTS:
make_sig_test(*test_spec)

View file

@ -195,6 +195,7 @@ class YoutubeDL:
ap_password: Multiple-system operator account password.
usenetrc: Use netrc for authentication instead.
netrc_location: Location of the netrc file. Defaults to ~/.netrc.
netrc_cmd: Use a shell command to get credentials
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
no_warnings: Do not print out anything for warnings.
@ -263,7 +264,7 @@ class YoutubeDL:
consoletitle: Display progress in console window's titlebar.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
clean_infojson: Remove private fields from the infojson
clean_infojson: Remove internal metadata from the infojson
getcomments: Extract video comments. This will not be written to disk
unless writeinfojson is also given
writeannotations: Write the video annotations to a .annotations.xml file
@ -1291,17 +1292,17 @@ def create_key(outer_mobj):
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
fmt = f'0{field_size_compat_map[key]:d}d'
if value is None:
value = default
elif replacement is not None:
if None not in (value, replacement):
try:
value = replacement_formatter.format(replacement, value)
except ValueError:
value = na
value, default = None, na
flags = outer_mobj.group('conversion') or ''
str_fmt = f'{fmt[:-1]}s'
if fmt[-1] == 'l': # list
if value is None:
value, fmt = default, 's'
elif fmt[-1] == 'l': # list
delim = '\n' if '#' in flags else ', '
value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
elif fmt[-1] == 'j': # json
@ -1332,17 +1333,19 @@ def create_key(outer_mobj):
value = str(value)[0]
else:
fmt = str_fmt
elif fmt[-1] not in 'rs': # numeric
elif fmt[-1] not in 'rsa': # numeric
value = float_or_none(value)
if value is None:
value, fmt = default, 's'
if sanitize:
# If value is an object, sanitize might convert it to a string
# So we convert it to repr first
if fmt[-1] == 'r':
# If value is an object, sanitize might convert it to a string
# So we convert it to repr first
value, fmt = repr(value), str_fmt
if fmt[-1] in 'csr':
elif fmt[-1] == 'a':
value, fmt = ascii(value), str_fmt
if fmt[-1] in 'csra':
value = sanitizer(initial_field, value)
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
@ -1408,7 +1411,7 @@ def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
def _match_entry(self, info_dict, incomplete=False, silent=False):
"""Returns None if the file should be downloaded"""
_type = info_dict.get('_type', 'video')
_type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
assert incomplete or _type == 'video', 'Only video result can be considered complete'
video_title = info_dict.get('title', info_dict.get('id', 'entry'))
@ -1906,7 +1909,7 @@ def __process_playlist(self, ie_result, download):
continue
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
if not lazy and 'playlist-index' in self.params['compat_opts']:
playlist_index = ie_result['requested_entries'][i]
entry_copy = collections.ChainMap(entry, {
@ -2668,7 +2671,8 @@ def is_wellformed(f):
format['dynamic_range'] = 'SDR'
if format.get('aspect_ratio') is None:
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
if (info_dict.get('duration') and format.get('tbr')
if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
and info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')):
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
@ -2807,11 +2811,13 @@ def to_screen(*msg):
new_info.update(fmt)
offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
end_time = offset + min(chapter.get('end_time', duration), duration)
# duration may not be accurate. So allow deviations <1sec
if end_time == float('inf') or end_time > offset + duration + 1:
end_time = None
if chapter or offset:
new_info.update({
'section_start': offset + chapter.get('start_time', 0),
# duration may not be accurate. So allow deviations <1sec
'section_end': end_time if end_time <= offset + duration + 1 else None,
'section_end': end_time,
'section_title': chapter.get('title'),
'section_number': chapter.get('index'),
})
@ -2963,8 +2969,7 @@ def print_field(field, actual_field=None, optional=False):
print_field('url', 'urls')
print_field('thumbnail', optional=True)
print_field('description', optional=True)
if filename:
print_field('filename')
print_field('filename')
if self.params.get('forceduration') and info_copy.get('duration') is not None:
self.to_stdout(formatSeconds(info_copy['duration']))
print_field('format')
@ -3188,7 +3193,6 @@ def existing_video_file(*filepaths):
return
if info_dict.get('requested_formats') is not None:
requested_formats = info_dict['requested_formats']
old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None:
if (info_dict['ext'] == 'webm'
@ -3215,19 +3219,22 @@ def correct_ext(filename, ext=new_ext):
full_filename = correct_ext(full_filename)
temp_filename = correct_ext(temp_filename)
dl_filename = existing_video_file(full_filename, temp_filename)
info_dict['__real_download'] = False
# NOTE: Copy so that original format dicts are not modified
info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
merger = FFmpegMergerPP(self)
downloaded = []
if dl_filename is not None:
self.report_file_already_downloaded(dl_filename)
elif fd:
for f in requested_formats if fd != FFmpegFD else []:
for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
f['filepath'] = fname = prepend_extension(
correct_ext(temp_filename, info_dict['ext']),
'f%s' % f['format_id'], info_dict['ext'])
downloaded.append(fname)
info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
success, real_download = self.dl(temp_filename, info_dict)
info_dict['__real_download'] = real_download
else:
@ -3251,7 +3258,7 @@ def correct_ext(filename, ext=new_ext):
f'You have requested downloading multiple formats to stdout {reason}. '
'The formats will be streamed one after the other')
fname = temp_filename
for f in requested_formats:
for f in info_dict['requested_formats']:
new_info = dict(info_dict)
del new_info['requested_formats']
new_info.update(f)
@ -3707,8 +3714,11 @@ def simplified_codec(f, field):
format_field(f, 'fps', '\t%d', func=round),
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
format_field(f, 'audio_channels', '\t%s'),
delim,
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
delim, (
format_field(f, 'filesize', ' \t%s', func=format_bytes)
or format_field(f, 'filesize_approx', '\t%s', func=format_bytes)
or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
format_field(f, 'tbr', '\t%dk', func=round),
shorten_protocol_name(f.get('protocol', '')),
delim,
@ -4112,8 +4122,11 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
ret.append((thumb_filename, thumb_filename_final))
t['filepath'] = thumb_filename
except network_exceptions as err:
if isinstance(err, urllib.error.HTTPError) and err.code == 404:
self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
else:
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
thumbnails.pop(idx)
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
if ret and not write_all:
break
return ret

View file

@ -189,8 +189,8 @@ def validate_minmax(min_val, max_val, min_name, max_name=None):
raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"')
# Usernames and passwords
validate(not opts.usenetrc or (opts.username is None and opts.password is None),
'.netrc', msg='using {name} conflicts with giving username/password')
validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc',
msg='{name}, netrc command and username/password are mutually exclusive options')
validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing')
validate(opts.ap_password is None or opts.ap_username is not None,
'TV Provider account username', msg='{name} missing')
@ -321,41 +321,56 @@ def validate_outtmpl(tmpl, msg):
opts.skip_download = None
del opts.outtmpl['default']
def parse_chapters(name, value):
chapters, ranges = [], []
def parse_chapters(name, value, advanced=False):
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
TIMESTAMP_RE = r'''(?x)(?:
(?P<start_sign>-?)(?P<start>[^-]+)
)?\s*-\s*(?:
(?P<end_sign>-?)(?P<end>[^-]+)
)?'''
current_time = time.time()
chapters, ranges, from_url = [], [], False
for regex in value or []:
if regex.startswith('*'):
for range_ in map(str.strip, regex[1:].split(',')):
mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
if None in (dur or [None]):
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
ranges.append(dur)
if advanced and regex == '*from-url':
from_url = True
continue
elif regex.startswith('#'):
for range_ in map(str.strip, regex[1:].split(',')):
mobj = range_ != '-' and re.fullmatch(r'(-?[^-]+)\s*-\s*(-?[^-]+)?', range_)
if not mobj:
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "#start-end"')
start_section = parse_timestamp(mobj.group(1) or '0')
end_section = parse_timestamp(mobj.group(2) or 'inf')
if start_section is None or end_section is None:
raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "#start-end"')
ranges.append((current_time + start_section, current_time + end_section))
elif not regex.startswith('*') or not regex.startswith('#'):
try:
chapters.append(re.compile(regex))
except re.error as err:
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
continue
try:
chapters.append(re.compile(regex))
except re.error as err:
raise ValueError(f'invalid {name} regex "{regex}" - {err}')
return chapters, ranges
opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
for range_ in map(str.strip, regex[1:].split(',')):
mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
dur = mobj and [parse_timestamp(mobj.group('start') or '0'),
parse_timestamp(mobj.group('end') or 'inf')]
signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
err = None
if None in (dur or [None]):
err = 'Must be of the form "*start-end"'
elif not advanced and any(signs):
err = 'Negative timestamps are not allowed'
elif regex.startswith('*'):
dur[0] *= -1 if signs[0] else 1
dur[1] *= -1 if signs[1] else 1
if dur[1] == float('-inf'):
err = '"-inf" is not a valid end'
elif regex.startswith('#'):
dur[0] = dur[0] * (-1 if signs[0] else 1) + current_time
dur[1] = dur[1] * (-1 if signs[1] else 1) + current_time
if dur[1] == float('-inf'):
err = '"-inf" is not a valid end'
if err:
raise ValueError(f'invalid {name} time range "{regex}". {err}')
ranges.append(dur)
return chapters, ranges, from_url
opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
# Cookies from browser
if opts.cookiesfrombrowser:
@ -757,6 +772,7 @@ def parse_options(argv=None):
return ParsedOptions(parser, opts, urls, {
'usenetrc': opts.usenetrc,
'netrc_location': opts.netrc_location,
'netrc_cmd': opts.netrc_cmd,
'username': opts.username,
'password': opts.password,
'twofactor': opts.twofactor,

5
yt_dlp/casefold.py Normal file
View file

@ -0,0 +1,5 @@
import warnings
warnings.warn(DeprecationWarning(f'{__name__} is deprecated'))
casefold = str.casefold

View file

@ -705,11 +705,11 @@ class _LinuxKeyring(Enum):
https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
SelectedLinuxBackend
"""
KWALLET4 = auto() # this value is just called KWALLET in the chromium source but it is for KDE4 only
KWALLET = auto() # KDE4
KWALLET5 = auto()
KWALLET6 = auto()
GNOME_KEYRING = auto()
BASIC_TEXT = auto()
GNOMEKEYRING = auto()
BASICTEXT = auto()
SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
@ -803,7 +803,7 @@ def _choose_linux_keyring(logger):
desktop_environment = _get_linux_desktop_environment(os.environ, logger)
logger.debug(f'detected desktop environment: {desktop_environment.name}')
if desktop_environment == _LinuxDesktopEnvironment.KDE4:
linux_keyring = _LinuxKeyring.KWALLET4
linux_keyring = _LinuxKeyring.KWALLET
elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
linux_keyring = _LinuxKeyring.KWALLET5
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
@ -811,9 +811,9 @@ def _choose_linux_keyring(logger):
elif desktop_environment in (
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
):
linux_keyring = _LinuxKeyring.BASIC_TEXT
linux_keyring = _LinuxKeyring.BASICTEXT
else:
linux_keyring = _LinuxKeyring.GNOME_KEYRING
linux_keyring = _LinuxKeyring.GNOMEKEYRING
return linux_keyring
@ -828,7 +828,7 @@ def _get_kwallet_network_wallet(keyring, logger):
"""
default_wallet = 'kdewallet'
try:
if keyring == _LinuxKeyring.KWALLET4:
if keyring == _LinuxKeyring.KWALLET:
service_name = 'org.kde.kwalletd'
wallet_path = '/modules/kwalletd'
elif keyring == _LinuxKeyring.KWALLET5:
@ -929,11 +929,11 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
logger.debug(f'Chosen keyring: {keyring.name}')
if keyring in (_LinuxKeyring.KWALLET4, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
return _get_kwallet_password(browser_keyring_name, keyring, logger)
elif keyring == _LinuxKeyring.GNOME_KEYRING:
elif keyring == _LinuxKeyring.GNOMEKEYRING:
return _get_gnome_keyring_password(browser_keyring_name, logger)
elif keyring == _LinuxKeyring.BASIC_TEXT:
elif keyring == _LinuxKeyring.BASICTEXT:
# when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
return None
assert False, f'Unknown keyring {keyring}'
@ -1326,3 +1326,7 @@ def get_cookie_header(self, url):
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
self.add_cookie_header(cookie_req)
return cookie_req.get_header('Cookie')
def clear(self, *args, **kwargs):
with contextlib.suppress(KeyError):
return super().clear(*args, **kwargs)

View file

@ -49,7 +49,6 @@ class FileDownloader:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
continuedl: Attempt to continue downloads if possible
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for expected network errors.
Default is 0 for API, but 10 for CLI

View file

@ -173,6 +173,9 @@ def _prepare_frag_download(self, ctx):
**self.params,
'noprogress': True,
'test': False,
'sleep_interval': 0,
'max_sleep_interval': 0,
'sleep_interval_subtitles': 0,
})
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'

View file

@ -7,9 +7,9 @@
from .external import FFmpegFD
from ..utils import (
DownloadError,
str_or_none,
sanitized_Request,
WebSocketsWrapper,
sanitized_Request,
str_or_none,
try_get,
)

View file

@ -497,6 +497,7 @@
DiscoveryPlusItalyIE,
DiscoveryPlusItalyShowIE,
DiscoveryPlusIndiaShowIE,
GlobalCyclingNetworkPlusIE,
)
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
@ -520,6 +521,7 @@
DeuxMNewsIE
)
from .digitalconcerthall import DigitalConcertHallIE
from .discogs import DiscogsReleasePlaylistIE
from .discovery import DiscoveryIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
@ -578,6 +580,7 @@
ESPNCricInfoIE,
)
from .esri import EsriVideoIE
from .ettutv import EttuTvIE
from .europa import EuropaIE, EuroParlWebstreamIE
from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE
@ -664,6 +667,7 @@
FunimationShowIE,
)
from .funk import FunkIE
from .funker530 import Funker530IE
from .fusion import FusionIE
from .fuyintv import FuyinTVIE
from .gab import (
@ -1116,7 +1120,8 @@
from .morningstar import MorningstarIE
from .motherless import (
MotherlessIE,
MotherlessGroupIE
MotherlessGroupIE,
MotherlessGalleryIE,
)
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
@ -1257,6 +1262,7 @@
NhkForSchoolProgramListIE,
NhkRadioNewsPageIE,
NhkRadiruIE,
NhkRadiruLiveIE,
)
from .nhl import NHLIE
from .nick import (
@ -1611,6 +1617,7 @@
from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
@ -1625,6 +1632,7 @@
from .rozhlas import (
RozhlasIE,
RozhlasVltavaIE,
MujRozhlasIE,
)
from .rte import RteIE, RteRadioIE
from .rtlnl import (
@ -2422,7 +2430,10 @@
ZenYandexChannelIE,
)
from .yapfiles import YapFilesIE
from .yappy import YappyIE
from .yappy import (
YappyIE,
YappyProfileIE,
)
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
from .yle_areena import YleAreenaIE
@ -2440,6 +2451,10 @@
from .youporn import YouPornIE
from .yourporn import YourPornIE
from .yourupload import YourUploadIE
from .zaiko import (
ZaikoIE,
ZaikoETicketIE,
)
from .zapiks import ZapiksIE
from .zattoo import (
BBVTVIE,

View file

@ -40,28 +40,33 @@ def _call_api(self, path, video_id, query=None):
class ACastIE(ACastBaseIE):
IE_NAME = 'acast'
_VALID_URL = r'''(?x)
_VALID_URL = r'''(?x:
https?://
(?:
(?:(?:embed|www)\.)?acast\.com/|
play\.acast\.com/s/
)
(?P<channel>[^/]+)/(?P<id>[^/#?]+)
'''
(?P<channel>[^/]+)/(?P<id>[^/#?"]+)
)'''
_EMBED_REGEX = [rf'(?x)<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3',
'title': '2. Raggarmordet - Röster ur det förflutna',
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
'description': 'md5:013959207e05011ad14a222cf22278cc',
'timestamp': 1477346700,
'upload_date': '20161024',
'duration': 2766,
'creator': 'Anton Berg & Martin Johnson',
'creator': 'Third Ear Studio',
'series': 'Spår',
'episode': '2. Raggarmordet - Röster ur det förflutna',
'thumbnail': 'https://assets.pippa.io/shows/616ebe1886d7b1398620b943/616ebe33c7e6e70013cae7da.jpg',
'episode_number': 2,
'display_id': '2.raggarmordet-rosterurdetforflutna',
'season_number': 4,
'season': 'Season 4',
}
}, {
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
@ -73,6 +78,23 @@ class ACastIE(ACastBaseIE):
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://ausi.anu.edu.au/news/democracy-sausage-episode-can-labor-be-long-form-government',
'info_dict': {
'id': '646c68fb21fbf20011e9c651',
'ext': 'mp3',
'creator': 'The Australian National University',
'display_id': 'can-labor-be-a-long-form-government',
'duration': 2618,
'thumbnail': 'https://assets.pippa.io/shows/6113e8578b4903809f16f7e5/1684821529295-515b9520db9ce53275b995eb302f941c.jpeg',
'title': 'Can Labor be a long-form government?',
'episode': 'Can Labor be a long-form government?',
'upload_date': '20230523',
'series': 'Democracy Sausage with Mark Kenny',
'timestamp': 1684826362,
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
}
}]
def _real_extract(self, url):
channel, display_id = self._match_valid_url(url).groups()

View file

@ -76,59 +76,6 @@ class AfreecaTVIE(InfoExtractor):
},
}],
'skip': 'Video is gone',
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793',
'info_dict': {
'id': '18650793',
'ext': 'mp4',
'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': '윈아디',
'uploader_id': 'badkids',
'duration': 107,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652',
'info_dict': {
'id': '10481652',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'duration': 6492,
},
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '20160502_c4c62b9d_174361386_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 3601,
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '20160502_39e739bb_174361386_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
'upload_date': '20160502',
'duration': 2891,
},
}],
'params': {
'skip_download': True,
},
}, {
# non standard key
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605',
@ -146,8 +93,8 @@ class AfreecaTVIE(InfoExtractor):
'skip_download': True,
},
}, {
# PARTIAL_ADULT
'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439',
# adult content
'url': 'https://vod.afreecatv.com/player/97267690',
'info_dict': {
'id': '20180327_27901457_202289533_1',
'ext': 'mp4',
@ -161,16 +108,25 @@ class AfreecaTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
'expected_warnings': ['adult content'],
'skip': 'The VOD does not exist',
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030',
'only_matching': True,
}, {
'url': 'http://vod.afreecatv.com/player/15055030',
'only_matching': True,
'url': 'https://vod.afreecatv.com/player/96753363',
'info_dict': {
'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4',
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
'upload_date': '20230108',
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
}]
@staticmethod
@ -223,26 +179,21 @@ def _perform_login(self, username, password):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
if re.search(r'alert\(["\']This video has been deleted', webpage):
raise ExtractorError(
'Video %s has been deleted' % video_id, expected=True)
station_id = self._search_regex(
r'nStationNo\s*=\s*(\d+)', webpage, 'station')
bbs_id = self._search_regex(
r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs')
video_id = self._search_regex(
r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id)
partial_view = False
adult_view = False
for _ in range(2):
data = self._download_json(
'https://api.m.afreecatv.com/station/video/a/view',
video_id, headers={'Referer': url}, data=urlencode_postdata({
'nTitleNo': video_id,
'nApiLevel': 10,
}))['data']
if traverse_obj(data, ('code', {int})) == -6221:
raise ExtractorError('The VOD does not exist', expected=True)
query = {
'nTitleNo': video_id,
'nStationNo': station_id,
'nBbsNo': bbs_id,
'nStationNo': data['station_no'],
'nBbsNo': data['bbs_no'],
}
if partial_view:
query['partialView'] = 'SKIP_ADULT'

View file

@ -5,6 +5,7 @@
int_or_none,
mimetype2ext,
parse_iso8601,
strip_jsonp,
unified_timestamp,
url_or_none,
)
@ -15,7 +16,7 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def _extract_feed_info(self, url):
feed = self._download_json(
url, None, 'Downloading Akamai AMP feed',
'Unable to download Akamai AMP feed')
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
item = feed.get('channel', {}).get('item')
if not item:
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
@ -73,8 +74,10 @@ def get_media_node(name, default=None):
media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
fmts, subs = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),

View file

@ -49,7 +49,7 @@ def _real_extract(self, url):
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
if urlh.status == 403:
if urlh.getcode() == 403:
if stream['code'] == 53004:
self.raise_login_required()
if stream['code'] == 53005:
@ -59,7 +59,7 @@ def _real_extract(self, url):
'This video is protected by a password, use the --video-password option', expected=True)
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
if urlh.status == 429:
if urlh.getcode() == 429:
self.raise_login_required(
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
method='cookies')

View file

@ -13,9 +13,11 @@
import os
import random
import re
import subprocess
import sys
import time
import types
import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree
@ -34,6 +36,7 @@
GeoUtils,
HEADRequest,
LenientJSONDecoder,
Popen,
RegexNotFoundError,
RetryManager,
UnsupportedError,
@ -56,6 +59,7 @@
join_nonempty,
js_to_json,
mimetype2ext,
netrc_from_content,
network_exceptions,
orderedSet,
parse_bitrate,
@ -286,6 +290,7 @@ class InfoExtractor:
channel_id: Id of the channel.
channel_url: Full URL to a channel webpage.
channel_follower_count: Number of followers of the channel.
channel_is_verified: Whether the channel is verified on the platform.
location: Physical location where the video was filmed.
subtitles: The available subtitles as a dictionary in the format
{tag: subformats}. "tag" is usually a language code, and
@ -470,8 +475,8 @@ class InfoExtractor:
Subclasses of this should also be added to the list of extractors and
should define a _VALID_URL regexp and, re-define the _real_extract() and
(optionally) _real_initialize() methods.
should define _VALID_URL as a regexp or a Sequence of regexps, and
re-define the _real_extract() and (optionally) _real_initialize() methods.
Subclasses may also override suitable() if necessary, but ensure the function
signature is preserved and that this function imports everything it needs
@ -534,7 +539,7 @@ class InfoExtractor:
_EMBED_REGEX = []
def _login_hint(self, method=NO_DEFAULT, netrc=None):
password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
return {
None: '',
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
@ -561,8 +566,8 @@ def _match_valid_url(cls, url):
# we have cached the regexp for *this* class, whereas getattr would also
# match the superclass
if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
return cls._VALID_URL_RE.match(url)
cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None)
@classmethod
def suitable(cls, url):
@ -1290,45 +1295,48 @@ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=Tr
return clean_html(res)
def _get_netrc_login_info(self, netrc_machine=None):
username = None
password = None
netrc_machine = netrc_machine or self._NETRC_MACHINE
if self.get_param('usenetrc', False):
try:
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
if os.path.isdir(netrc_file):
netrc_file = os.path.join(netrc_file, '.netrc')
info = netrc.netrc(file=netrc_file).authenticators(netrc_machine)
if info is not None:
username = info[0]
password = info[2]
else:
raise netrc.NetrcParseError(
'No authenticators for %s' % netrc_machine)
except (OSError, netrc.NetrcParseError) as err:
self.report_warning(
'parsing .netrc: %s' % error_to_compat_str(err))
cmd = self.get_param('netrc_cmd')
if cmd:
cmd = cmd.replace('{}', netrc_machine)
self.to_screen(f'Executing command: {cmd}')
stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE)
if ret != 0:
raise OSError(f'Command returned error code {ret}')
info = netrc_from_content(stdout).authenticators(netrc_machine)
return username, password
elif self.get_param('usenetrc', False):
netrc_file = compat_expanduser(self.get_param('netrc_location') or '~')
if os.path.isdir(netrc_file):
netrc_file = os.path.join(netrc_file, '.netrc')
info = netrc.netrc(netrc_file).authenticators(netrc_machine)
else:
return None, None
if not info:
raise netrc.NetrcParseError(f'No authenticators for {netrc_machine}')
return info[0], info[2]
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
"""
Get the login info as (username, password)
First look for the manually specified credentials using username_option
and password_option as keys in params dictionary. If no such credentials
available look in the netrc file using the netrc_machine or _NETRC_MACHINE
value.
are available try the netrc_cmd if it is defined or look in the
netrc file using the netrc_machine or _NETRC_MACHINE value.
If there's no info available, return (None, None)
"""
# Attempt to use provided username and password or .netrc data
username = self.get_param(username_option)
if username is not None:
password = self.get_param(password_option)
else:
username, password = self._get_netrc_login_info(netrc_machine)
try:
username, password = self._get_netrc_login_info(netrc_machine)
except (OSError, netrc.NetrcParseError) as err:
self.report_warning(f'Failed to parse .netrc: {err}')
return None, None
return username, password
def _get_tfa_info(self, note='two-factor verification code'):

View file

@ -0,0 +1,35 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import traverse_obj
class DiscogsReleasePlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?discogs\.com/(?P<type>release|master)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.discogs.com/release/1-The-Persuader-Stockholm',
'info_dict': {
'id': 'release1',
'title': 'Stockholm',
},
'playlist_mincount': 7,
}, {
'url': 'https://www.discogs.com/master/113-Vince-Watson-Moments-In-Time',
'info_dict': {
'id': 'master113',
'title': 'Moments In Time',
},
'playlist_mincount': 53,
}]
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
display_id = f'{playlist_type}{playlist_id}'
response = self._download_json(
f'https://api.discogs.com/{playlist_type}s/{playlist_id}', display_id)
entries = [
self.url_result(video['uri'], YoutubeIE, video_title=video.get('title'))
for video in traverse_obj(response, ('videos', lambda _, v: YoutubeIE.suitable(v['uri'])))]
return self.playlist_result(entries, display_id, response.get('title'))

View file

@ -65,6 +65,7 @@ def _download_video_playback_info(self, disco_base, video_id, headers):
return streaming_list
def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''):
country = self.get_param('geo_bypass_country') or country
geo_countries = [country.upper()]
self._initialize_geo_bypass({
'countries': geo_countries,
@ -1001,3 +1002,39 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
_SHOW_STR = 'show'
_INDEX = 4
_VIDEO_IE = DiscoveryPlusIndiaIE
class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
_TESTS = [{
'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
'info_dict': {
'id': '1397691',
'ext': 'mp4',
'title': 'The Athertons: Mountain Biking\'s Fastest Family',
'description': 'md5:75a81937fcd8b989eec6083a709cd837',
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
'series': 'gcn',
'creator': 'Gcn',
'upload_date': '20210309',
'timestamp': 1615248000,
'duration': 2531.0,
'tags': [],
},
'skip': 'Subscription required',
'params': {'skip_download': 'm3u8'},
}]
_PRODUCT = 'web'
_DISCO_API_PARAMS = {
'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
'realm': 'gcn',
'country': 'us',
}
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
'x-disco-params': f'realm={realm}',
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
'Authorization': self._get_auth(disco_base, display_id, realm),
})

View file

@ -1,13 +1,17 @@
import functools
from .common import InfoExtractor
from .vimeo import VHXEmbedIE
from ..utils import (
ExtractorError,
OnDemandPagedList,
clean_html,
extract_attributes,
get_element_by_class,
get_element_by_id,
get_elements_by_class,
get_elements_html_by_class,
int_or_none,
join_nonempty,
traverse_obj,
unified_strdate,
urlencode_postdata,
)
@ -162,12 +166,13 @@ def _real_extract(self, url):
class DropoutSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)'
_PAGE_SIZE = 24
_VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:(?P<season>[0-9]+)/?$)'
_TESTS = [
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
'note': 'Multi-season series with the season in the url',
'playlist_count': 17,
'playlist_count': 24,
'info_dict': {
'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1'
@ -176,7 +181,7 @@ class DropoutSeasonIE(InfoExtractor):
{
'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
'note': 'Multi-season series with the season not in the url',
'playlist_count': 17,
'playlist_count': 24,
'info_dict': {
'id': 'dimension-20-fantasy-high-season-1',
'title': 'Dimension 20 Fantasy High - Season 1'
@ -190,29 +195,30 @@ class DropoutSeasonIE(InfoExtractor):
'id': 'dimension-20-shriek-week-season-1',
'title': 'Dimension 20 Shriek Week - Season 1'
}
},
{
'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3',
'note': 'Multi-season series with season in the url that requires pagination',
'playlist_count': 25,
'info_dict': {
'id': 'breaking-news-no-laugh-newsroom-season-3',
'title': 'Breaking News No Laugh Newsroom - Season 3'
}
}
]
def _fetch_page(self, url, season_id, page):
page += 1
webpage = self._download_webpage(
f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400})
yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj(
get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))]
def _real_extract(self, url):
season_id = self._match_id(url)
season_num = self._match_valid_url(url).group('season') or 1
season_title = season_id.replace('-', ' ').title()
webpage = self._download_webpage(url, season_id)
entries = [
self.url_result(
url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']',
item, 'item_url'),
ie=DropoutIE.ie_key()
) for item in get_elements_by_class('js-collection-item', webpage)
]
seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '')
current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>',
seasons, 'current_season', default='').strip()
return {
'_type': 'playlist',
'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')),
'title': join_nonempty(season_title, current_season, delim=' - '),
'entries': entries
}
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE),
f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}')

View file

@ -1,12 +1,17 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
qualities,
)
class DumpertIE(InfoExtractor):
_VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
_VALID_URL = r'''(?x)
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?:
/(?:mediabase|embed|item)/|
(?:/toppers|/latest|/?)\?selectedId=
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
_TESTS = [{
'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
@ -16,6 +21,9 @@ class DumpertIE(InfoExtractor):
'title': 'Ik heb nieuws voor je',
'description': 'Niet schrikken hoor',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 9,
'view_count': int,
'like_count': int,
}
}, {
'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
@ -26,6 +34,28 @@ class DumpertIE(InfoExtractor):
}, {
'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/item/100031688_b317a185',
'info_dict': {
'id': '100031688/b317a185',
'ext': 'mp4',
'title': 'Epic schijnbeweging',
'description': '<p>Die zag je niet eh</p>',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'duration': 12,
'view_count': int,
'like_count': int,
},
'params': {'skip_download': 'm3u8'}
}, {
'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/latest?selectedId=100031688_b317a185',
'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
'only_matching': True,
}]
def _real_extract(self, url):
@ -36,18 +66,23 @@ def _real_extract(self, url):
title = item['title']
media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
quality = qualities(['flv', 'mobile', 'tablet', '720p', '1080p'])
formats = []
for variant in media.get('variants', []):
uri = variant.get('uri')
if not uri:
continue
version = variant.get('version')
formats.append({
'url': uri,
'format_id': version,
'quality': quality(version),
})
preference = quality(version)
if determine_ext(uri) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
uri, video_id, 'mp4', m3u8_id=version, quality=preference))
else:
formats.append({
'url': uri,
'format_id': version,
'quality': preference,
})
thumbnails = []
stills = item.get('stills') or {}

View file

@ -0,0 +1,60 @@
from .common import InfoExtractor
from ..utils import bool_or_none, traverse_obj, unified_timestamp, url_or_none
class EttuTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ettu\.tv/[^?#]+/playerpage/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.ettu.tv/en-int/playerpage/1573849',
'md5': '5874b7639a2aa866d1f6c3a4037c7c09',
'info_dict': {
'id': '1573849',
'title': 'Ni Xia Lian - Shao Jieni',
'description': 'ITTF Europe Top 16 Cup',
'timestamp': 1677348600,
'upload_date': '20230225',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'ext': 'mp4',
},
}, {
'url': 'https://www.ettu.tv/en-int/playerpage/1573753',
'md5': '1fc094bf96cf2d5ec0f434d3a6dec9aa',
'info_dict': {
'id': '1573753',
'title': 'Qiu Dang - Jorgic Darko',
'description': 'ITTF Europe Top 16 Cup',
'timestamp': 1677423600,
'upload_date': '20230226',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'ext': 'mp4',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
player_settings = self._download_json(
f'https://www.ettu.tv/api/v3/contents/{video_id}/player-settings', video_id, query={
'language': 'en',
'showTitle': 'true',
'device': 'desktop',
})
stream_response = self._download_json(player_settings['streamAccess'], video_id, data={})
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
stream_response['data']['stream'], video_id, 'mp4')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(player_settings, {
'title': 'title',
'description': ('metaInformation', 'competition'),
'thumbnail': ('image', {url_or_none}),
'timestamp': ('date', {unified_timestamp}),
'is_live': ('isLivestream', {bool_or_none}),
})
}

View file

@ -7,8 +7,37 @@
class FoxNewsIE(AMPIE):
IE_NAME = 'foxnews'
IE_DESC = 'Fox News and Fox Business Video'
_VALID_URL = r'https?://(?P<host>video\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_VALID_URL = r'https?://video\.(?:insider\.)?fox(?:news|business)\.com/v/(?:video-embed\.html\?video_id=)?(?P<id>\d+)'
_TESTS = [
{
'url': 'https://video.foxnews.com/v/6320653836112',
'info_dict': {
'id': '6320653836112',
'ext': 'mp4',
'title': 'Tucker Carlson joins \'Gutfeld!\' to discuss his new documentary',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 404,
'upload_date': '20230217',
'description': 'md5:858a8a36f59e9ca897d758855bcdfa02',
'timestamp': 1676611344.0,
},
'params': {'skip_download': 'm3u8'},
},
{
# From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
'info_dict': {
'id': '5099377331001',
'ext': 'mp4',
'title': '82416_censoring',
'description': '82416_censoring',
'upload_date': '20160826',
'timestamp': 1472169708.0,
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 521,
},
'params': {'skip_download': 'm3u8'},
},
{
'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips',
'md5': '32aaded6ba3ef0d1c04e238d01031e5e',
@ -22,6 +51,7 @@ class FoxNewsIE(AMPIE):
'upload_date': '20110503',
'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': '404 page',
},
{
'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips',
@ -36,10 +66,7 @@ class FoxNewsIE(AMPIE):
'upload_date': '20141204',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'm3u8 HTTP error 400 in web browser',
},
{
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',
@ -49,11 +76,6 @@ class FoxNewsIE(AMPIE):
'url': 'http://video.foxbusiness.com/v/4442309889001',
'only_matching': True,
},
{
# From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words
'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true',
'only_matching': True,
},
]
@classmethod
@ -67,10 +89,10 @@ def _extract_embed_urls(cls, url, webpage):
yield f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
video_id = self._match_id(url)
info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))
f'https://api.foxnews.com/v3/video-player/{video_id}?callback=uid_{video_id}')
info['id'] = video_id
return info
@ -78,6 +100,19 @@ def _real_extract(self, url):
class FoxNewsVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.foxnews.com/video/6328632286112',
'info_dict': {
'id': '6328632286112',
'ext': 'mp4',
'title': 'Review: 2023 Toyota Prius Prime',
'duration': 155,
'thumbnail': r're:^https://.+\.jpg$',
'timestamp': 1685720177.0,
'upload_date': '20230602',
'description': 'md5:b69aafb125b41c1402e9744f53d6edc4',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.foxnews.com/video/6313058664112',
'info_dict': {
'id': '6313058664112',
@ -89,8 +124,7 @@ class FoxNewsVideoIE(InfoExtractor):
'title': 'Gutfeld! - Thursday, September 29',
'timestamp': 1664527538,
},
'expected_warnings': ['Ignoring subtitle tracks'],
'params': {'skip_download': 'm3u8'},
'skip': '404 page',
}]
def _real_extract(self, url):
@ -104,19 +138,22 @@ class FoxNewsArticleIE(InfoExtractor):
_TESTS = [{
# data-video-id
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
'md5': '83d44e1aff1433e7a29a7b537d1700b5',
'url': 'https://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
'md5': 'd2dd6ce809cedeefa96460e964821437',
'info_dict': {
'id': '5116295019001',
'ext': 'mp4',
'title': 'Trump and Clinton asked to defend positions on Iraq War',
'description': 'Veterans react on \'The Kelly File\'',
'description': 'Veterans and Fox News host Dana Perino react on \'The Kelly File\' to NBC\'s presidential forum',
'timestamp': 1473301045,
'upload_date': '20160908',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 426,
},
'params': {'skip_download': 'm3u8'},
}, {
# iframe embed
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
'url': 'https://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
'info_dict': {
'id': '5748266721001',
'ext': 'flv',
@ -127,9 +164,7 @@ class FoxNewsArticleIE(InfoExtractor):
'timestamp': 1520594670,
'upload_date': '20180309',
},
'params': {
'skip_download': True,
},
'skip': '404 page',
}, {
'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words',
'only_matching': True,

View file

@ -0,0 +1,79 @@
from .common import InfoExtractor
from .rumble import RumbleEmbedIE
from .youtube import YoutubeIE
from ..utils import ExtractorError, clean_html, get_element_by_class, strip_or_none
class Funker530IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funker530\.com/video/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://funker530.com/video/azov-patrol-caught-in-open-under-automatic-grenade-launcher-fire/',
'md5': '085f50fea27523a388bbc22e123e09c8',
'info_dict': {
'id': 'v2qbmu4',
'ext': 'mp4',
'title': 'Azov Patrol Caught In Open Under Automatic Grenade Launcher Fire',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Funker530',
'channel': 'Funker530',
'channel_url': 'https://rumble.com/c/c-1199543',
'width': 1280,
'height': 720,
'fps': 25,
'duration': 27,
'upload_date': '20230608',
'timestamp': 1686241321,
'live_status': 'not_live',
'description': 'md5:bea2e1f458095414e04b5ac189c2f980',
}
}, {
'url': 'https://funker530.com/video/my-friends-joined-the-russians-civdiv/',
'md5': 'a42c2933391210662e93e867d7124b70',
'info_dict': {
'id': 'k-pk4bOvoac',
'ext': 'mp4',
'view_count': int,
'channel': 'Civ Div',
'comment_count': int,
'channel_follower_count': int,
'thumbnail': 'https://i.ytimg.com/vi/k-pk4bOvoac/maxresdefault.jpg',
'uploader_id': '@CivDiv',
'duration': 357,
'channel_url': 'https://www.youtube.com/channel/UCgsCiwJ88up-YyMHo7hL5-A',
'tags': [],
'uploader_url': 'https://www.youtube.com/@CivDiv',
'channel_id': 'UCgsCiwJ88up-YyMHo7hL5-A',
'like_count': int,
'description': 'md5:aef75ec3f59c07a0e39400f609b24429',
'live_status': 'not_live',
'age_limit': 0,
'uploader': 'Civ Div',
'categories': ['People & Blogs'],
'title': 'My “Friends” joined the Russians.',
'availability': 'public',
'upload_date': '20230608',
'playable_in_embed': True,
'heatmap': 'count:100',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage))
if rumble_url:
info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()}
else:
youtube_url = list(YoutubeIE._extract_embed_urls(url, webpage))
if youtube_url:
info = {'url': youtube_url[0], 'ie_key': YoutubeIE.ie_key()}
if not info:
raise ExtractorError('No videos found on webpage', expected=True)
return {
**info,
'_type': 'url_transparent',
'description': strip_or_none(self._search_regex(
r'(?s)(.+)About the Author', clean_html(get_element_by_class('video-desc-paragraph', webpage)),
'description', default=None))
}

0
yt_dlp/extractor/globalplayer.py Executable file → Normal file
View file

View file

@ -83,7 +83,7 @@ class HotStarIE(HotStarBaseIE):
_VALID_URL = r'''(?x)
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
(?:
(?P<type>movies|sports|episode|(?P<tv>tv))/
(?P<type>movies|sports|episode|(?P<tv>tv|shows))/
(?(tv)(?:[^/?#]+/){2}|[^?#]*)
)?
[^/?#]+/
@ -122,6 +122,25 @@ class HotStarIE(HotStarBaseIE):
'episode': 'Janhvi Targets Suman',
'episode_number': 8,
}
}, {
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843',
'info_dict': {
'id': '1000282843',
'ext': 'mp4',
'title': 'Anupama, Anuj Share a Moment',
'season': 'Chapter 1',
'description': 'md5:8d74ed2248423b8b06d5c8add4d7a0c0',
'timestamp': 1678149000,
'channel': 'StarPlus',
'series': 'Anupama',
'season_number': 1,
'season_id': 7399,
'upload_date': '20230307',
'episode': 'Anupama, Anuj Share a Moment',
'episode_number': 853,
'duration': 1272,
'channel_id': 3,
},
}, {
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
'only_matching': True,
@ -139,6 +158,7 @@ class HotStarIE(HotStarBaseIE):
'sports': 'match',
'episode': 'episode',
'tv': 'episode',
'shows': 'episode',
None: 'content',
}
@ -304,13 +324,16 @@ def _real_extract(self, url):
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/tv(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
'info_dict': {
'id': '3_2_26',
},
'playlist_mincount': 20,
}, {
'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
'only_matching': True,
}, {
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
'only_matching': True,
@ -327,7 +350,7 @@ def _real_extract(self, url):
class HotStarSeasonIE(HotStarBaseIE):
IE_NAME = 'hotstar:season'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
'info_dict': {
@ -346,6 +369,9 @@ class HotStarSeasonIE(HotStarBaseIE):
'id': '8208',
},
'playlist_mincount': 19,
}, {
'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/',
'only_matching': True,
}]
def _real_extract(self, url):
@ -356,7 +382,7 @@ def _real_extract(self, url):
class HotStarSeriesIE(HotStarBaseIE):
IE_NAME = 'hotstar:series'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
'info_dict': {
@ -375,6 +401,12 @@ class HotStarSeriesIE(HotStarBaseIE):
'id': '435',
},
'playlist_mincount': 267,
}, {
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/',
'info_dict': {
'id': '1260022017',
},
'playlist_mincount': 940,
}]
def _real_extract(self, url):

View file

@ -1,68 +1,83 @@
import functools
import urllib.parse
import urllib.error
import hashlib
import json
import time
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
jwt_decode_hs256,
mimetype2ext,
qualities,
traverse_obj,
try_call,
unified_timestamp,
)
# https://github.com/yt-dlp/yt-dlp/issues/6671
class IwaraBaseIE(InfoExtractor):
_NETRC_MACHINE = 'iwara'
_USERTOKEN = None
_MEDIATOKEN = None
_NETRC_MACHINE = 'iwara'
def _get_user_token(self, invalidate=False):
if not invalidate and self._USERTOKEN:
return self._USERTOKEN
def _is_token_expired(self, token, token_type):
# User token TTL == ~3 weeks, Media token TTL == ~1 hour
if (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 120):
self.to_screen(f'{token_type} token has expired')
return True
def _get_user_token(self):
username, password = self._get_login_info()
IwaraBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
if not IwaraBaseIE._USERTOKEN or invalidate:
IwaraBaseIE._USERTOKEN = self._download_json(
if not username or not password:
return
user_token = IwaraBaseIE._USERTOKEN or self.cache.load(self._NETRC_MACHINE, username)
if not user_token or self._is_token_expired(user_token, 'User'):
response = self._download_json(
'https://api.iwara.tv/user/login', None, note='Logging in',
data=json.dumps({
headers={'Content-Type': 'application/json'}, data=json.dumps({
'email': username,
'password': password
}).encode('utf-8'),
headers={
}).encode(), expected_status=lambda x: True)
user_token = traverse_obj(response, ('token', {str}))
if not user_token:
error = traverse_obj(response, ('message', {str}))
if 'invalidLogin' in error:
raise ExtractorError('Invalid login credentials', expected=True)
else:
raise ExtractorError(f'Iwara API said: {error or "nothing"}')
self.cache.store(self._NETRC_MACHINE, username, user_token)
IwaraBaseIE._USERTOKEN = user_token
def _get_media_token(self):
self._get_user_token()
if not IwaraBaseIE._USERTOKEN:
return # user has not passed credentials
if not IwaraBaseIE._MEDIATOKEN or self._is_token_expired(IwaraBaseIE._MEDIATOKEN, 'Media'):
IwaraBaseIE._MEDIATOKEN = self._download_json(
'https://api.iwara.tv/user/token', None, note='Fetching media token',
data=b'', headers={
'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}',
'Content-Type': 'application/json'
})['token']
})['accessToken']
self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN)
return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'}
return self._USERTOKEN
def _get_media_token(self, invalidate=False):
if not invalidate and self._MEDIATOKEN:
return self._MEDIATOKEN
IwaraBaseIE._MEDIATOKEN = self._download_json(
'https://api.iwara.tv/user/token', None, note='Fetching media token',
data=b'', # Need to have some data here, even if it's empty
headers={
'Authorization': f'Bearer {self._get_user_token()}',
'Content-Type': 'application/json'
})['accessToken']
return self._MEDIATOKEN
def _perform_login(self, username, password):
self._get_media_token()
class IwaraIE(IwaraBaseIE):
IE_NAME = 'iwara'
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
# this video cannot be played because of migration
'only_matching': True,
'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
'info_dict': {
'id': 'k2ayoueezfkx6gvq',
@ -79,25 +94,29 @@ class IwaraIE(IwaraBaseIE):
'timestamp': 1677843869,
'modified_timestamp': 1679056362,
},
'skip': 'this video cannot be played because of migration',
}, {
'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
'md5': '20691ce1473ec2766c0788e14c60ce66',
'md5': '7645f966f069b8ec9210efd9130c9aad',
'info_dict': {
'id': '1ywe1sbkqwumpdxz5',
'ext': 'mp4',
'age_limit': 18,
'title': 'Aponia 阿波尼亚SEX Party Tonight 手动脱衣 大奶 裸腿',
'description': 'md5:0c4c310f2e0592d68b9f771d348329ca',
'uploader': '龙也zZZ',
'title': 'Aponia アポニア SEX Party Tonight 手の脱衣 巨乳 ',
'description': 'md5:3f60016fff22060eef1ef26d430b1f67',
'uploader': 'Lyu ya',
'uploader_id': 'user792540',
'tags': [
'uncategorized'
],
'like_count': 1809,
'view_count': 25156,
'comment_count': 1,
'like_count': int,
'view_count': int,
'comment_count': int,
'timestamp': 1678732213,
'modified_timestamp': 1679110271,
'modified_timestamp': int,
'thumbnail': 'https://files.iwara.tv/image/thumbnail/581d12b5-46f4-4f15-beb2-cfe2cde5d13d/thumbnail-00.jpg',
'modified_date': '20230614',
'upload_date': '20230313',
},
}, {
'url': 'https://iwara.tv/video/blggmfno8ghl725bg',
@ -112,12 +131,15 @@ class IwaraIE(IwaraBaseIE):
'tags': [
'pee'
],
'like_count': 192,
'view_count': 12119,
'comment_count': 0,
'like_count': int,
'view_count': int,
'comment_count': int,
'timestamp': 1598880567,
'modified_timestamp': 1598908995,
'availability': 'needs_auth',
'modified_timestamp': int,
'upload_date': '20200831',
'modified_date': '20230605',
'thumbnail': 'https://files.iwara.tv/image/thumbnail/7693e881-d302-42a4-a780-f16d66b5dadd/thumbnail-00.jpg',
# 'availability': 'needs_auth',
},
}]
@ -142,17 +164,16 @@ def _extract_formats(self, video_id, fileurl):
def _real_extract(self, url):
video_id = self._match_id(url)
username, password = self._get_login_info()
headers = {
'Authorization': f'Bearer {self._get_media_token()}',
} if username and password else None
video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True, headers=headers)
username, _ = self._get_login_info()
video_data = self._download_json(
f'https://api.iwara.tv/video/{video_id}', video_id,
expected_status=lambda x: True, headers=self._get_media_token())
errmsg = video_data.get('message')
# at this point we can actually get uploaded user info, but do we need it?
if errmsg == 'errors.privateVideo':
self.raise_login_required('Private video. Login if you have permissions to watch')
self.raise_login_required('Private video. Login if you have permissions to watch', method='password')
elif errmsg == 'errors.notFound' and not username:
self.raise_login_required('Video may need login to view')
self.raise_login_required('Video may need login to view', method='password')
elif errmsg: # None if success
raise ExtractorError(f'Iwara says: {errmsg}')
@ -181,15 +202,6 @@ def _real_extract(self, url):
'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
}
def _perform_login(self, username, password):
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
self.write_debug('Skipping logging in')
return
IwaraBaseIE._USERTOKEN = self._get_user_token(True)
self._get_media_token(True)
self.cache.store(self._NETRC_MACHINE, username, IwaraBaseIE._USERTOKEN)
class IwaraUserIE(IwaraBaseIE):
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
@ -200,12 +212,14 @@ class IwaraUserIE(IwaraBaseIE):
'url': 'https://iwara.tv/profile/user792540/videos',
'info_dict': {
'id': 'user792540',
'title': 'Lyu ya',
},
'playlist_mincount': 80,
'playlist_mincount': 70,
}, {
'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
'info_dict': {
'id': 'theblackbirdcalls',
'title': 'TheBlackbirdCalls',
},
'playlist_mincount': 723,
}, {
@ -214,6 +228,13 @@ class IwaraUserIE(IwaraBaseIE):
}, {
'url': 'https://iwara.tv/profile/theblackbirdcalls',
'only_matching': True,
}, {
'url': 'https://www.iwara.tv/profile/lumymmd',
'info_dict': {
'id': 'lumymmd',
'title': 'Lumy MMD',
},
'playlist_mincount': 1,
}]
def _entries(self, playlist_id, user_id, page):
@ -225,7 +246,7 @@ def _entries(self, playlist_id, user_id, page):
'sort': 'date',
'user': user_id,
'limit': self._PER_PAGE,
})
}, headers=self._get_media_token())
for x in traverse_obj(videos, ('results', ..., 'id')):
yield self.url_result(f'https://iwara.tv/video/{x}')
@ -244,7 +265,6 @@ def _real_extract(self, url):
class IwaraPlaylistIE(IwaraBaseIE):
# the ID is an UUID but I don't think it's necessary to write concrete regex
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
IE_NAME = 'iwara:playlist'
_PER_PAGE = 32
@ -260,7 +280,8 @@ class IwaraPlaylistIE(IwaraBaseIE):
def _entries(self, playlist_id, first_page, page):
videos = self._download_json(
'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
query={'page': page, 'limit': self._PER_PAGE}) if page else first_page
query={'page': page, 'limit': self._PER_PAGE},
headers=self._get_media_token()) if page else first_page
for x in traverse_obj(videos, ('results', ..., 'id')):
yield self.url_result(f'https://iwara.tv/video/{x}')
@ -268,7 +289,7 @@ def _real_extract(self, url):
playlist_id = self._match_id(url)
page_0 = self._download_json(
f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
note='Requesting playlist info')
note='Requesting playlist info', headers=self._get_media_token())
return self.playlist_result(
OnDemandPagedList(

View file

@ -1,8 +1,8 @@
import functools
import json
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
HEADRequest,
@ -12,7 +12,10 @@
int_or_none,
mimetype2ext,
parse_qs,
traverse_obj,
try_get,
url_or_none,
urlhandle_detect_ext,
urljoin,
)
@ -52,38 +55,25 @@ def _permanent_url(self, url, claim_name, claim_id):
'/%s:%s' % (claim_name, claim_id))
def _parse_stream(self, stream, url):
stream_value = stream.get('value') or {}
stream_type = stream_value.get('stream_type')
source = stream_value.get('source') or {}
media = stream_value.get(stream_type) or {}
signing_channel = stream.get('signing_channel') or {}
channel_name = signing_channel.get('name')
channel_claim_id = signing_channel.get('claim_id')
channel_url = None
if channel_name and channel_claim_id:
channel_url = self._permanent_url(url, channel_name, channel_claim_id)
stream_type = traverse_obj(stream, ('value', 'stream_type', {str}))
info = traverse_obj(stream, {
'title': ('value', 'title', {str}),
'thumbnail': ('value', 'thumbnail', 'url', {url_or_none}),
'description': ('value', 'description', {str}),
'license': ('value', 'license', {str}),
'timestamp': ('timestamp', {int_or_none}),
'release_timestamp': ('value', 'release_time', {int_or_none}),
'tags': ('value', 'tags', ..., {lambda x: x or None}),
'duration': ('value', stream_type, 'duration', {int_or_none}),
'channel': ('signing_channel', 'value', 'title', {str}),
'channel_id': ('signing_channel', 'claim_id', {str}),
})
channel_name = traverse_obj(stream, ('signing_channel', 'name', {str}))
if channel_name and info.get('channel_id'):
info['channel_url'] = self._permanent_url(url, channel_name, info['channel_id'])
info = {
'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
'description': stream_value.get('description'),
'license': stream_value.get('license'),
'timestamp': int_or_none(stream.get('timestamp')),
'release_timestamp': int_or_none(stream_value.get('release_time')),
'tags': stream_value.get('tags'),
'duration': int_or_none(media.get('duration')),
'channel': try_get(signing_channel, lambda x: x['value']['title']),
'channel_id': channel_claim_id,
'channel_url': channel_url,
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
'filesize': int_or_none(source.get('size')),
}
if stream_type == 'audio':
info['vcodec'] = 'none'
else:
info.update({
'width': int_or_none(media.get('width')),
'height': int_or_none(media.get('height')),
})
return info
@ -186,6 +176,28 @@ class LBRYIE(LBRYBaseIE):
'license': 'None',
},
'params': {'skip_download': True}
}, {
# original quality format w/higher resolution than HLS formats
'url': 'https://odysee.com/@wickedtruths:2/Biotechnological-Invasion-of-Skin-(April-2023):4',
'md5': '305b0b3b369bde1b984961f005b67193',
'info_dict': {
'id': '41fbfe805eb73c8d3012c0c49faa0f563274f634',
'ext': 'mp4',
'title': 'Biotechnological Invasion of Skin (April 2023)',
'description': 'md5:709a2f4c07bd8891cda3a7cc2d6fcf5c',
'channel': 'Wicked Truths',
'channel_id': '23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'channel_url': 'https://odysee.com/@wickedtruths:23d2bbf856b0ceed5b1d7c5960bcc72da5a20cb0',
'timestamp': 1685790036,
'upload_date': '20230603',
'release_timestamp': 1685617473,
'release_date': '20230601',
'duration': 1063,
'thumbnail': 'https://thumbs.odycdn.com/4e6d39da4df0cfdad45f64e253a15959.webp',
'tags': ['smart skin surveillance', 'biotechnology invasion of skin', 'morgellons'],
'license': 'None',
'protocol': 'https', # test for direct mp4 download
},
}, {
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
'only_matching': True,
@ -221,41 +233,64 @@ def _real_extract(self, url):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
display_id = display_id.replace(':', '#')
display_id = compat_urllib_parse_unquote(display_id)
display_id = urllib.parse.unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
headers = {'Referer': 'https://odysee.com/'}
if result['value'].get('stream_type') in self._SUPPORTED_STREAM_TYPES:
formats = []
stream_type = traverse_obj(result, ('value', 'stream_type', {str}))
if stream_type in self._SUPPORTED_STREAM_TYPES:
claim_id, is_live = result['claim_id'], False
streaming_url = self._call_api_proxy(
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
# GET request returns original video/audio file if available
ext = urlhandle_detect_ext(self._request_webpage(
streaming_url, display_id, 'Checking for original quality', headers=headers))
if ext != 'm3u8':
formats.append({
'url': streaming_url,
'format_id': 'original',
'quality': 1,
**traverse_obj(result, ('value', {
'ext': ('source', (('name', {determine_ext}), ('media_type', {mimetype2ext}))),
'filesize': ('source', 'size', {int_or_none}),
'width': ('video', 'width', {int_or_none}),
'height': ('video', 'height', {int_or_none}),
}), get_all=False),
'vcodec': 'none' if stream_type == 'audio' else None,
})
# HEAD request returns redirect response to m3u8 URL if available
final_url = self._request_webpage(
HEADRequest(streaming_url), display_id, headers=headers,
note='Downloading streaming redirect url info').geturl()
elif result.get('value_type') == 'stream':
claim_id, is_live = result['signing_channel']['claim_id'], True
live_data = self._download_json(
'https://api.odysee.live/livestream/is_live', claim_id,
query={'channel_claim_id': claim_id},
note='Downloading livestream JSON metadata')['data']
streaming_url = final_url = live_data.get('VideoURL')
final_url = live_data.get('VideoURL')
# Upcoming videos may still give VideoURL
if not live_data.get('Live'):
streaming_url = final_url = None
final_url = None
self.raise_no_formats('This stream is not live', True, claim_id)
else:
raise UnsupportedError(url)
info = self._parse_stream(result, url)
if determine_ext(final_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
final_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', live=is_live, headers=headers)
else:
info['url'] = streaming_url
formats.extend(self._extract_m3u8_formats(
final_url, display_id, 'mp4', m3u8_id='hls', live=is_live, headers=headers))
return {
**info,
**self._parse_stream(result, url),
'id': claim_id,
'title': result['value']['title'],
'formats': formats,
'is_live': is_live,
'http_headers': headers,
}
@ -299,14 +334,12 @@ def _fetch_page(self, claim_id, url, params, page):
if not (stream_claim_name and stream_claim_id):
continue
info = self._parse_stream(item, url)
info.update({
yield {
**self._parse_stream(item, url),
'_type': 'url',
'id': stream_claim_id,
'title': try_get(item, lambda x: x['value']['title']),
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
})
yield info
}
def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#')

View file

@ -1,17 +1,17 @@
import base64
import time
import urllib.error
import uuid
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
parse_resolution,
traverse_obj,
try_get,
url_or_none,
urljoin,
)
@ -30,16 +30,18 @@ class MGTVIE(InfoExtractor):
'duration': 7461,
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://w.mgtv.com/b/427837/15588271.html',
'info_dict': {
'id': '15588271',
'ext': 'mp4',
'title': '春日迟迟再出发 沉浸版',
'title': '春日迟迟再出发 沉浸版第1期陆莹结婚半年查出肾炎被离婚 吴雅婷把一半票根退给前夫',
'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6',
'thumbnail': r're:^https?://.+\.jpg',
'duration': 4026,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://w.mgtv.com/b/333652/7329822.html',
'info_dict': {
@ -50,6 +52,7 @@ class MGTVIE(InfoExtractor):
'thumbnail': r're:^https?://.+\.jpg',
'duration': 2656,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://w.mgtv.com/b/427837/15591647.html',
'only_matching': True,
@ -64,6 +67,13 @@ class MGTVIE(InfoExtractor):
'only_matching': True,
}]
_RESOLUTIONS = {
'标清': ('480p', '854x480'),
'高清': ('540p', '960x540'),
'超清': ('720p', '1280x720'),
'蓝光': ('1080p', '1920x1080'),
}
def _real_extract(self, url):
video_id = self._match_id(url)
tk2 = base64.urlsafe_b64encode(
@ -76,55 +86,60 @@ def _real_extract(self, url):
'type': 'pch5'
}, headers=self.geo_verification_headers())['data']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), None)
if error.get('code') == 40005:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
raise ExtractorError(error['msg'], expected=True)
raise
info = api_data['info']
title = info['title'].strip()
stream_data = self._download_json(
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
'pm2': api_data['atc']['pm2'],
'tk2': tk2,
'pm2': api_data['atc']['pm2'],
'video_id': video_id,
'type': 'pch5',
'src': 'intelmgtv',
}, headers=self.geo_verification_headers())['data']
stream_domain = stream_data['stream_domain'][0]
stream_domain = traverse_obj(stream_data, ('stream_domain', ..., {url_or_none}), get_all=False)
formats = []
for idx, stream in enumerate(stream_data['stream']):
stream_path = stream.get('url')
if not stream_path:
continue
format_data = self._download_json(
stream_domain + stream_path, video_id,
note=f'Download video info for format #{idx}')
format_url = format_data.get('info')
for idx, stream in enumerate(traverse_obj(stream_data, ('stream', lambda _, v: v['url']))):
stream_name = traverse_obj(stream, 'name', 'standardName', 'barName', expected_type=str)
resolution = traverse_obj(
self._RESOLUTIONS, (stream_name, 1 if stream.get('scale') == '16:9' else 0))
format_url = traverse_obj(self._download_json(
urljoin(stream_domain, stream['url']), video_id, fatal=False,
note=f'Downloading video info for format {resolution or stream_name}'),
('info', {url_or_none}))
if not format_url:
continue
tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
formats.append({
'format_id': compat_str(tbr or idx),
'url': url_or_none(format_url),
'format_id': str(tbr or idx),
'url': format_url,
'ext': 'mp4',
'tbr': tbr,
'vcodec': stream.get('videoFormat'),
'acodec': stream.get('audioFormat'),
**parse_resolution(resolution),
'protocol': 'm3u8_native',
'http_headers': {
'Referer': url,
},
'format_note': stream.get('name'),
'format_note': stream_name,
})
return {
'id': video_id,
'title': title,
'formats': formats,
'description': info.get('desc'),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('thumb'),
**traverse_obj(api_data, ('info', {
'title': ('title', {str.strip}),
'description': ('desc', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('thumb', {url_or_none}),
})),
'subtitles': self.extract_subtitles(video_id, stream_domain),
}

View file

@ -1,32 +1,39 @@
import datetime
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
InAdvancePagedList,
orderedSet,
OnDemandPagedList,
remove_end,
str_to_int,
unified_strdate,
)
class MotherlessIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
_VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/|G[VIG]?[A-F0-9]+/)?(?P<id>[A-F0-9]+)'
_TESTS = [{
'url': 'http://motherless.com/AC3FFE1',
'md5': '310f62e325a9fafe64f68c0bccb6e75f',
'url': 'http://motherless.com/EE97006',
'md5': 'cb5e7438f7a3c4e886b7bccc1292a3bc',
'info_dict': {
'id': 'AC3FFE1',
'id': 'EE97006',
'ext': 'mp4',
'title': 'Fucked in the ass while playing PS3',
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup',
'title': 'Dogging blond Brit getting glazed (comp)',
'categories': ['UK', 'slag', 'whore', 'dogging', 'cunt', 'cumhound', 'big tits', 'Pearl Necklace'],
'upload_date': '20230519',
'uploader_id': 'deathbird',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
}
'comment_count': int,
'view_count': int,
'like_count': int,
},
'params': {
# Incomplete cert chains
'nocheckcertificate': True,
},
}, {
'url': 'http://motherless.com/532291B',
'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
@ -49,16 +56,36 @@ class MotherlessIE(InfoExtractor):
'id': '633979F',
'ext': 'mp4',
'title': 'Turtlette',
'categories': ['superheroine heroine superher'],
'categories': ['superheroine heroine superher'],
'upload_date': '20140827',
'uploader_id': 'shade0230',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
}
'like_count': int,
'comment_count': int,
'view_count': int,
},
'params': {
'nocheckcertificate': True,
},
}, {
# no keywords
'url': 'http://motherless.com/8B4BBC1',
'only_matching': True,
'info_dict': {
'id': '8B4BBC1',
'ext': 'mp4',
'title': 'VIDEO00441.mp4',
'categories': [],
'upload_date': '20160214',
'uploader_id': 'NMWildGirl',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
'like_count': int,
'comment_count': int,
'view_count': int,
},
'params': {
'nocheckcertificate': True,
},
}, {
# see https://motherless.com/videos/recent for recent videos with
# uploaded date in "ago" format
@ -72,9 +99,12 @@ class MotherlessIE(InfoExtractor):
'uploader_id': 'anonymous',
'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
'like_count': int,
'comment_count': int,
'view_count': int,
},
'params': {
'skip_download': True,
'nocheckcertificate': True,
},
}]
@ -128,10 +158,8 @@ def _real_extract(self, url):
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
webpage, 'uploader_id', fatal=False)
categories = self._html_search_meta('keywords', webpage, default=None)
if categories:
categories = [cat.strip() for cat in categories.split(',')]
categories = self._html_search_meta('keywords', webpage, default='')
categories = [cat.strip() for cat in categories.split(',') if cat.strip()]
return {
'id': video_id,
@ -148,102 +176,97 @@ def _real_extract(self, url):
}
class MotherlessGroupIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
class MotherlessPaginatedIE(InfoExtractor):
_PAGE_SIZE = 60
def _correct_path(self, url, item_id):
raise NotImplementedError('This method must be implemented by subclasses')
def _extract_entries(self, webpage, base):
for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)',
webpage):
video_url = urllib.parse.urljoin(base, mobj.group('href'))
video_id = MotherlessIE.get_temp_id(video_url)
if video_id:
yield self.url_result(video_url, MotherlessIE, video_id, mobj.group('title'))
def _real_extract(self, url):
item_id = self._match_id(url)
real_url = self._correct_path(url, item_id)
webpage = self._download_webpage(real_url, item_id, 'Downloading page 1')
def get_page(idx):
page = idx + 1
current_page = webpage if not idx else self._download_webpage(
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
yield from self._extract_entries(current_page, real_url)
return self.playlist_result(
OnDemandPagedList(get_page, self._PAGE_SIZE), item_id,
remove_end(self._html_extract_title(webpage), ' | MOTHERLESS.COM ™'))
class MotherlessGroupIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/g[vifm]?/(?P<id>[a-z0-9_]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'http://motherless.com/g/movie_scenes',
'url': 'http://motherless.com/gv/movie_scenes',
'info_dict': {
'id': 'movie_scenes',
'title': 'Movie Scenes',
'description': 'Hot and sexy scenes from "regular" movies... '
'Beautiful actresses fully nude... A looot of '
'skin! :)Enjoy!',
},
'playlist_mincount': 662,
'playlist_mincount': 540,
}, {
'url': 'http://motherless.com/gv/sex_must_be_funny',
'url': 'http://motherless.com/g/sex_must_be_funny',
'info_dict': {
'id': 'sex_must_be_funny',
'title': 'Sex must be funny',
'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
'any kind!'
},
'playlist_mincount': 0,
'expected_warnings': [
'This group has no videos.',
]
'playlist_count': 0,
}, {
'url': 'https://motherless.com/g/beautiful_cock',
'url': 'https://motherless.com/gv/beautiful_cock',
'info_dict': {
'id': 'beautiful_cock',
'title': 'Beautiful Cock',
'description': 'Group for lovely cocks yours, mine, a friends anything human',
},
'playlist_mincount': 2500,
'playlist_mincount': 2040,
}]
@classmethod
def suitable(cls, url):
return (False if MotherlessIE.suitable(url)
else super(MotherlessGroupIE, cls).suitable(url))
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/gv/{item_id}')
def _extract_entries(self, webpage, base):
entries = []
for mobj in re.finditer(
r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
webpage):
video_url = compat_urlparse.urljoin(base, mobj.group('href'))
if not MotherlessIE.suitable(video_url):
continue
video_id = MotherlessIE._match_id(video_url)
title = mobj.group('title')
entries.append(self.url_result(
video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
video_title=title))
# Alternative fallback
if not entries:
entries = [
self.url_result(
compat_urlparse.urljoin(base, '/' + entry_id),
ie=MotherlessIE.ie_key(), video_id=entry_id)
for entry_id in orderedSet(re.findall(
r'data-codename=["\']([A-Z0-9]+)', webpage))]
return entries
def _real_extract(self, url):
group_id = self._match_id(url)
page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
webpage = self._download_webpage(page_url, group_id)
title = self._search_regex(
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
description = self._html_search_meta(
'description', webpage, fatal=False)
page_count = str_to_int(self._search_regex(
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
webpage, 'page_count', default=0))
if not page_count:
message = self._search_regex(
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
webpage, 'error_msg', default=None) or 'This group has no videos.'
self.report_warning(message, group_id)
page_count = 1
PAGE_SIZE = 80
class MotherlessGalleryIE(MotherlessPaginatedIE):
_VALID_URL = r'https?://(?:www\.)?motherless\.com/G[VIG]?(?P<id>[A-F0-9]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://motherless.com/GV338999F',
'info_dict': {
'id': '338999F',
'title': 'Random',
},
'playlist_mincount': 190,
}, {
'url': 'https://motherless.com/GVABD6213',
'info_dict': {
'id': 'ABD6213',
'title': 'Cuties',
},
'playlist_mincount': 2,
}, {
'url': 'https://motherless.com/GVBCF7622',
'info_dict': {
'id': 'BCF7622',
'title': 'Vintage',
},
'playlist_count': 0,
}, {
'url': 'https://motherless.com/G035DE2F',
'info_dict': {
'id': '035DE2F',
'title': 'General',
},
'playlist_mincount': 420,
}]
def _get_page(idx):
if idx > 0:
webpage = self._download_webpage(
page_url, group_id, query={'page': idx + 1},
note='Downloading page %d/%d' % (idx + 1, page_count)
)
for entry in self._extract_entries(webpage, url):
yield entry
playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
return {
'_type': 'playlist',
'id': group_id,
'title': title,
'description': description,
'entries': playlist
}
def _correct_path(self, url, item_id):
return urllib.parse.urljoin(url, f'/GV{item_id}')

View file

@ -3,7 +3,7 @@
import urllib.error
from .common import InfoExtractor
from ..utils import ExtractorError, parse_iso8601
from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
@ -65,19 +65,20 @@ def _fetch_nebula_bearer_token(self):
return response['token']
def _fetch_video_formats(self, slug):
stream_info = self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/stream/',
stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/',
video_id=slug,
auth_type='bearer',
note='Fetching video stream info')
manifest_url = stream_info['manifest']
return self._extract_m3u8_formats_and_subtitles(manifest_url, slug)
return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4')
def _build_video_info(self, episode):
fmts, subs = self._fetch_video_formats(episode['slug'])
channel_slug = episode['channel_slug']
channel_title = episode['channel_title']
zype_id = episode.get('zype_id')
return {
'id': episode['zype_id'],
'id': remove_start(episode['id'], 'video_episode:'),
'display_id': episode['slug'],
'formats': fmts,
'subtitles': subs,
@ -99,6 +100,9 @@ def _build_video_info(self, episode):
'uploader_url': f'https://nebula.tv/{channel_slug}',
'series': channel_title,
'creator': channel_title,
'extractor_key': NebulaIE.ie_key(),
'extractor': NebulaIE.IE_NAME,
'_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None,
}
def _perform_login(self, username=None, password=None):
@ -113,7 +117,7 @@ class NebulaIE(NebulaBaseIE):
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'md5': '14944cfee8c7beeea106320c47560efc',
'info_dict': {
'id': '5c271b40b13fd613090034fd',
'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
'ext': 'mp4',
'title': 'That Time Disney Remade Beauty and the Beast',
'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We werent able to remove it without reducing video quality, so its presented here in its original context.',
@ -137,22 +141,22 @@ class NebulaIE(NebulaBaseIE):
'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
'md5': 'd05739cf6c38c09322422f696b569c23',
'info_dict': {
'id': '5e7e78171aaf320001fbd6be',
'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
'ext': 'mp4',
'title': 'Landing Craft - How The Allies Got Ashore',
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
'upload_date': '20200327',
'timestamp': 1585348140,
'channel': 'Real Engineering',
'channel_id': 'realengineering',
'uploader': 'Real Engineering',
'uploader_id': 'realengineering',
'series': 'Real Engineering',
'channel': 'Real Engineering — The Logistics of D-Day',
'channel_id': 'd-day',
'uploader': 'Real Engineering — The Logistics of D-Day',
'uploader_id': 'd-day',
'series': 'Real Engineering — The Logistics of D-Day',
'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
'creator': 'Real Engineering',
'creator': 'Real Engineering — The Logistics of D-Day',
'duration': 841,
'channel_url': 'https://nebula.tv/realengineering',
'uploader_url': 'https://nebula.tv/realengineering',
'channel_url': 'https://nebula.tv/d-day',
'uploader_url': 'https://nebula.tv/d-day',
'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
},
},
@ -160,7 +164,7 @@ class NebulaIE(NebulaBaseIE):
'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
'md5': 'ebe28a7ad822b9ee172387d860487868',
'info_dict': {
'id': '5e779ebdd157bc0001d1c75a',
'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
'ext': 'mp4',
'title': 'Episode 1: The Draw',
'description': r'contains:Theres free money on offer… if the players can all work together.',
@ -190,7 +194,7 @@ class NebulaIE(NebulaBaseIE):
]
def _fetch_video_metadata(self, slug):
return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/',
video_id=slug,
auth_type='bearer',
note='Fetching video meta data')

View file

@ -2,12 +2,15 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
parse_duration,
traverse_obj,
unescapeHTML,
unified_timestamp,
url_or_none,
urljoin,
url_or_none
)
@ -492,3 +495,73 @@ class NhkRadioNewsPageIE(InfoExtractor):
def _real_extract(self, url):
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
class NhkRadiruLiveIE(InfoExtractor):
_GEO_COUNTRIES = ['JP']
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/player/\?ch=(?P<id>r[12]|fm)'
_TESTS = [{
# radio 1, no area specified
'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
'info_dict': {
'id': 'r1-tokyo',
'title': 're:^ネットラジオ第1 東京.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
'live_status': 'is_live',
},
}, {
# radio 2, area specified
# (the area doesnt actually matter, r2 is national)
'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
'info_dict': {
'id': 'r2-fukuoka',
'title': 're:^ネットラジオ第2 福岡.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
'live_status': 'is_live',
},
}, {
# fm, area specified
'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
'info_dict': {
'id': 'fm-sapporo',
'title': 're:^NHKネットラジオFM 札幌.+$',
'ext': 'm4a',
'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
'live_status': 'is_live',
}
}]
_NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
def _real_extract(self, url):
station = self._match_id(url)
area = self._configuration_arg('area', ['tokyo'])[0]
config = self._download_xml(
'https://www.nhk.or.jp/radio/config/config_web.xml', station, 'Downloading area information')
data = config.find(f'.//data//area[.="{area}"]/..')
if not data:
raise ExtractorError('Invalid area. Valid areas are: %s' % ', '.join(
[i.text for i in config.findall('.//data//area')]), expected=True)
noa_info = self._download_json(
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
station, note=f'Downloading {area} station metadata')
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
return {
'title': ' '.join(traverse_obj(present_info, (('service', 'area',), 'name', {str}))),
'id': join_nonempty(station, area),
'thumbnails': traverse_obj(present_info, ('service', 'images', ..., {
'url': 'url',
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
})),
'formats': self._extract_m3u8_formats(data.find(f'{station}hls').text, station),
'is_live': True,
}

View file

@ -1,3 +1,5 @@
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
@ -7,6 +9,7 @@
)
from ..utils import (
ExtractorError,
HEADRequest,
float_or_none,
int_or_none,
qualities,
@ -15,6 +18,7 @@
unescapeHTML,
unified_strdate,
unsmuggle_url,
url_or_none,
urlencode_postdata,
)
@ -41,7 +45,7 @@ class OdnoklassnikiIE(InfoExtractor):
'ext': 'mp4',
'timestamp': 1545580896,
'view_count': int,
'thumbnail': 'https://coub-attachments.akamaized.net/coub_storage/coub/simple/cw_image/c5ac87553bd/608e806a1239c210ab692/1545580913_00026.jpg',
'thumbnail': r're:^https?://.*\.jpg$',
'title': 'Народная забава',
'uploader': 'Nevata',
'upload_date': '20181223',
@ -65,13 +69,14 @@ class OdnoklassnikiIE(InfoExtractor):
'title': str,
'uploader': str,
},
'skip': 'vk extractor error',
}, {
# metadata in JSON
# metadata in JSON, webm_dash with Firefox UA
'url': 'http://ok.ru/video/20079905452',
'md5': '5d2b64756e2af296e3b383a0bc02a6aa',
'md5': '8f477d8931c531374a3e36daec617b2c',
'info_dict': {
'id': '20079905452',
'ext': 'mp4',
'ext': 'webm',
'title': 'Культура меняет нас (прекрасный ролик!))',
'thumbnail': str,
'duration': 100,
@ -81,10 +86,14 @@ class OdnoklassnikiIE(InfoExtractor):
'like_count': int,
'age_limit': 0,
},
'params': {
'format': 'bv[ext=webm]',
'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0'},
},
}, {
# metadataUrl
'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
'md5': 'f8c951122516af72e6e6ffdd3c41103b',
'md5': '2bae2f58eefe1b3d26f3926c4a64d2f3',
'info_dict': {
'id': '63567059965189-0',
'ext': 'mp4',
@ -98,10 +107,11 @@ class OdnoklassnikiIE(InfoExtractor):
'age_limit': 0,
'start_time': 5,
},
'params': {'skip_download': 'm3u8'},
}, {
# YouTube embed (metadataUrl, provider == USER_YOUTUBE)
'url': 'https://ok.ru/video/3952212382174',
'md5': '91749d0bd20763a28d083fa335bbd37a',
'md5': '5fb5f83ce16cb212d6bf887282b5da53',
'info_dict': {
'id': '5axVgHHDBvU',
'ext': 'mp4',
@ -116,7 +126,7 @@ class OdnoklassnikiIE(InfoExtractor):
'live_status': 'not_live',
'view_count': int,
'thumbnail': 'https://i.mycdn.me/i?r=AEHujHvw2RjEbemUCNEorZbxYpb_p_9AcN2FmGik64Krkcmz37YtlY093oAM5-HIEAt7Zi9s0CiBOSDmbngC-I-k&fn=external_8',
'uploader_url': 'http://www.youtube.com/user/MrKewlkid94',
'uploader_url': 'https://www.youtube.com/@MrKewlkid94',
'channel_follower_count': int,
'tags': ['youtube-dl', 'youtube playlists', 'download videos', 'download audio'],
'channel_id': 'UCVGtvURtEURYHtJFUegdSug',
@ -145,7 +155,6 @@ class OdnoklassnikiIE(InfoExtractor):
},
'skip': 'Video has not been found',
}, {
# TODO: HTTP Error 400: Bad Request, it only works if there's no cookies when downloading
'note': 'Only available in mobile webpage',
'url': 'https://m.ok.ru/video/2361249957145',
'info_dict': {
@ -153,8 +162,8 @@ class OdnoklassnikiIE(InfoExtractor):
'ext': 'mp4',
'title': 'Быковское крещение',
'duration': 3038.181,
'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+',
},
'skip': 'HTTP Error 400',
}, {
'note': 'subtitles',
'url': 'https://ok.ru/video/4249587550747',
@ -226,6 +235,12 @@ class OdnoklassnikiIE(InfoExtractor):
'skip': 'Site no longer embeds',
}]
def _clear_cookies(self, cdn_url):
# Direct http downloads will fail if CDN cookies are set
# so we need to reset them after each format extraction
self.cookiejar.clear(domain='.mycdn.me')
self.cookiejar.clear(domain=urllib.parse.urlparse(cdn_url).hostname)
@classmethod
def _extract_embed_urls(cls, url, webpage):
for x in super()._extract_embed_urls(url, webpage):
@ -364,14 +379,22 @@ def _extract_desktop(self, url):
formats = [{
'url': f['url'],
'ext': 'mp4',
'format_id': f['name'],
} for f in metadata['videos']]
'format_id': f.get('name'),
} for f in traverse_obj(metadata, ('videos', lambda _, v: url_or_none(v['url'])))]
m3u8_url = metadata.get('hlsManifestUrl')
m3u8_url = traverse_obj(metadata, 'hlsManifestUrl', 'ondemandHls')
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._clear_cookies(m3u8_url)
for mpd_id, mpd_key in [('dash', 'ondemandDash'), ('webm', 'metadataWebmUrl')]:
mpd_url = metadata.get(mpd_key)
if mpd_url:
formats.extend(self._extract_mpd_formats(
mpd_url, video_id, mpd_id=mpd_id, fatal=False))
self._clear_cookies(mpd_url)
dash_manifest = metadata.get('metadataEmbedded')
if dash_manifest:
@ -390,6 +413,7 @@ def _extract_desktop(self, url):
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
self._clear_cookies(m3u8_url)
rtmp_url = metadata.get('rtmpUrl')
if rtmp_url:
formats.append({
@ -423,6 +447,10 @@ def _extract_mobile(self, url):
r'data-video="(.+?)"', webpage, 'json data')
json_data = self._parse_json(unescapeHTML(json_data), video_id) or {}
redirect_url = self._request_webpage(HEADRequest(
json_data['videoSrc']), video_id, 'Requesting download URL').geturl()
self._clear_cookies(redirect_url)
return {
'id': video_id,
'title': json_data.get('videoName'),
@ -430,7 +458,7 @@ def _extract_mobile(self, url):
'thumbnail': json_data.get('videoPosterSrc'),
'formats': [{
'format_id': 'mobile',
'url': json_data.get('videoSrc'),
'url': redirect_url,
'ext': 'mp4',
}]
}

View file

@ -0,0 +1,94 @@
from .common import InfoExtractor
from ..utils import extract_attributes, merge_dicts, remove_end
class RheinMainTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)'
_TESTS = [{
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/',
'info_dict': {
'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022',
'ext': 'ismv', # ismv+isma will be merged into mp4
'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft',
'title': 'Auf dem Weg zur Deutschen Meisterschaft',
'upload_date': '20221108',
'view_count': int,
'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft',
'thumbnail': r're:^https://.+\.jpg',
'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9',
'timestamp': 1667933057,
'duration': 243.0,
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
'info_dict': {
'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022',
'ext': 'ismv',
'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
'timestamp': 1668526214,
'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften',
'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
'view_count': int,
'thumbnail': r're:^https://.+\.jpg',
'duration': 345.0,
'description': 'md5:9370ba29526984006c2cba1372e5c5a0',
'upload_date': '20221115',
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
'info_dict': {
'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022',
'ext': 'ismv',
'title': 'Casino Mainz bei den Deutschen Meisterschaften',
'view_count': int,
'timestamp': 1668527402,
'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften',
'upload_date': '20221115',
'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften',
'duration': 348.0,
'thumbnail': r're:^https://.+\.jpg',
'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa',
},
'params': {'skip_download': 'ism'},
}, {
'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = mobj.group('display_id')
video_id = mobj.group('video_id').replace('/', '-')
webpage = self._download_webpage(url, video_id)
source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)',
webpage, 'video', group=('source', 'img'))
source = extract_attributes(source)
img = extract_attributes(img)
raw_json_ld = list(self._yield_json_ld(webpage, video_id))
json_ld = self._json_ld(raw_json_ld, video_id)
json_ld.pop('url', None)
ism_manifest_url = (
source.get('src')
or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject')
)
formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id)
return merge_dicts({
'id': video_id,
'display_id': display_id,
'title':
self._html_search_regex(r'<h1><span class="title">([^<]*)</span>',
webpage, 'headline', default=None)
or img.get('title') or json_ld.get('title') or self._og_search_title(webpage)
or remove_end(self._html_extract_title(webpage), ' -'),
'alt_title': img.get('alt'),
'description': json_ld.get('description') or self._og_search_description(webpage),
'formats': formats,
'subtitles': subtitles,
'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'),
}, json_ld)

View file

@ -1,10 +1,15 @@
import itertools
import urllib.error
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
remove_start,
str_or_none,
traverse_obj,
unified_timestamp,
url_or_none,
)
@ -51,7 +56,40 @@ def _real_extract(self, url):
}
class RozhlasVltavaIE(InfoExtractor):
class RozhlasBaseIE(InfoExtractor):
def _extract_formats(self, entry, audio_id):
formats = []
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
ext = audio.get('variant')
for retry in self.RetryManager():
if retry.attempt > 1:
self._sleep(1, audio_id)
try:
if ext == 'dash':
formats.extend(self._extract_mpd_formats(
audio['url'], audio_id, mpd_id=ext))
elif ext == 'hls':
formats.extend(self._extract_m3u8_formats(
audio['url'], audio_id, 'm4a', m3u8_id=ext))
else:
formats.append({
'url': audio['url'],
'ext': ext,
'format_id': ext,
'abr': int_or_none(audio.get('bitrate')),
'acodec': ext,
'vcodec': 'none',
})
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 429:
retry.error = e.cause
else:
self.report_warning(e.msg)
return formats
class RozhlasVltavaIE(RozhlasBaseIE):
_VALID_URL = r'https?://(?:\w+\.rozhlas|english\.radio)\.cz/[\w-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
@ -168,33 +206,14 @@ class RozhlasVltavaIE(InfoExtractor):
}]
def _extract_video(self, entry):
formats = []
audio_id = entry['meta']['ga']['contentId']
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
ext = audio.get('variant')
if ext == 'dash':
formats.extend(self._extract_mpd_formats(
audio['url'], audio_id, mpd_id=ext, fatal=False))
elif ext == 'hls':
formats.extend(self._extract_m3u8_formats(
audio['url'], audio_id, 'm4a', m3u8_id=ext, fatal=False))
else:
formats.append({
'url': audio['url'],
'ext': ext,
'format_id': ext,
'abr': int_or_none(audio.get('bitrate')),
'acodec': ext,
'vcodec': 'none',
})
chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none}))
return {
'id': audio_id,
'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
'chapter_number': chapter_number,
'formats': formats,
'formats': self._extract_formats(entry, audio_id),
**traverse_obj(entry, {
'title': ('meta', 'ga', 'contentName'),
'description': 'title',
@ -219,3 +238,106 @@ def _real_extract(self, url):
'title': traverse_obj(data, ('series', 'title')),
'entries': map(self._extract_video, data['playlist']),
}
class MujRozhlasIE(RozhlasBaseIE):
_VALID_URL = r'https?://(?:www\.)?mujrozhlas\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
# single episode extraction
'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
'md5': '6f8fd68663e64936623e67c152a669e0',
'info_dict': {
'id': '10739193',
'ext': 'mp3',
'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
'timestamp': 1684915200,
'modified_timestamp': 1684922446,
'series': 'Vykopávky',
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
'channel_id': 'radio-wave',
'upload_date': '20230524',
'modified_date': '20230524',
},
}, {
# serial extraction
'url': 'https://www.mujrozhlas.cz/radiokniha/jaroslava-janackova-pribeh-tajemneho-psani-o-pramenech-genezi-babicky',
'playlist_mincount': 7,
'info_dict': {
'id': 'bb2b5f4e-ffb4-35a6-a34a-046aa62d6f6b',
'title': 'Jaroslava Janáčková: Příběh tajemného psaní. O pramenech a genezi Babičky',
'description': 'md5:7434d8fac39ac9fee6df098e11dfb1be',
},
}, {
# show extraction
'url': 'https://www.mujrozhlas.cz/nespavci',
'playlist_mincount': 14,
'info_dict': {
'id': '09db9b37-d0f4-368c-986a-d3439f741f08',
'title': 'Nespavci',
'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
},
}]
def _call_api(self, path, item_id, msg='API JSON'):
return self._download_json(
f'https://api.mujrozhlas.cz/{path}/{item_id}', item_id,
note=f'Downloading {msg}', errnote=f'Failed to download {msg}')['data']
def _extract_audio_entry(self, entry):
audio_id = entry['meta']['ga']['contentId']
return {
'id': audio_id,
'formats': self._extract_formats(entry['attributes'], audio_id),
**traverse_obj(entry, {
'title': ('attributes', 'title'),
'description': ('attributes', 'description'),
'episode_number': ('attributes', 'part'),
'series': ('attributes', 'mirroredShow', 'title'),
'chapter': ('attributes', 'mirroredSerial', 'title'),
'artist': ('meta', 'ga', 'contentAuthor'),
'channel_id': ('meta', 'ga', 'contentCreator'),
'timestamp': ('attributes', 'since', {unified_timestamp}),
'modified_timestamp': ('attributes', 'updated', {unified_timestamp}),
'thumbnail': ('attributes', 'asset', 'url', {url_or_none}),
})
}
def _entries(self, api_url, playlist_id):
for page in itertools.count(1):
episodes = self._download_json(
api_url, playlist_id, note=f'Downloading episodes page {page}',
errnote=f'Failed to download episodes page {page}', fatal=False)
for episode in traverse_obj(episodes, ('data', lambda _, v: v['meta']['ga']['contentId'])):
yield self._extract_audio_entry(episode)
api_url = traverse_obj(episodes, ('links', 'next', {url_or_none}))
if not api_url:
break
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
info = self._search_json(r'\bvar\s+dl\s*=', webpage, 'info json', display_id)
entity = info['siteEntityBundle']
if entity == 'episode':
return self._extract_audio_entry(self._call_api(
'episodes', info['contentId'], 'episode info API JSON'))
elif entity in ('show', 'serial'):
playlist_id = info['contentShow'].split(':')[0] if entity == 'show' else info['contentId']
data = self._call_api(f'{entity}s', playlist_id, f'{entity} playlist JSON')
api_url = data['relationships']['episodes']['links']['related']
return self.playlist_result(
self._entries(api_url, playlist_id), playlist_id,
**traverse_obj(data, ('attributes', {
'title': 'title',
'description': 'description',
})))
else:
# `entity == 'person'` not implemented yet by API, ref:
# https://api.mujrozhlas.cz/persons/8367e456-2a57-379a-91bb-e699619bea49/participation
raise ExtractorError(f'Unsupported entity type "{entity}"')

View file

@ -144,7 +144,7 @@ def _extract_embed_urls(cls, url, webpage):
if embeds:
return embeds
return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)

View file

@ -73,7 +73,10 @@ def _real_extract(self, url):
key = bytes_to_intlist(compat_b64decode(data_json['key']))
iv = [0] * 16
m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii')
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
headers = {'stream_key': data_json['stream_key']}
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers=headers)
for fmt in formats:
fmt['http_headers'] = headers
release_date = self._html_search_regex(
(r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'),

View file

@ -10,6 +10,8 @@
from ..utils import (
ExtractorError,
int_or_none,
jwt_decode_hs256,
try_call,
try_get,
)
@ -77,8 +79,10 @@ def _perform_login(self, username, password):
self._HEADERS['device_id'] = self._get_device_id()
self._HEADERS['content-type'] = 'application/json'
if username.lower() == 'token' and len(password) > 1198:
if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
self._HEADERS['authorization'] = password
self.report_login()
return
elif len(username) != 10 or not username.isdigit():
raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')

View file

@ -42,14 +42,13 @@ def _real_extract(self, url):
elif not traverse_obj(data, ('viewCam', 'model', 'isLive'), expected_type=bool):
raise UserNotLive(video_id=video_id)
server = traverse_obj(data, ('viewCam', 'viewServers', 'flashphoner-hls'), expected_type=str)
model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int)
formats = []
for host in traverse_obj(data, ('config', 'data', (
(('features', 'featuresV2'), 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))):
formats = self._extract_m3u8_formats(
f'https://b-{server}.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
f'https://edge-hls.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True)
if formats:
break

View file

@ -2,7 +2,7 @@
import urllib.parse
from .common import InfoExtractor
from ..utils import str_or_none, traverse_obj
from ..utils import js_to_json, str_or_none, traverse_obj
class SubstackIE(InfoExtractor):
@ -14,7 +14,7 @@ class SubstackIE(InfoExtractor):
'id': '47660949',
'ext': 'mp4',
'title': 'I MADE A VLOG',
'description': 'md5:10c01ff93439a62e70ce963b2aa0b7f6',
'description': 'md5:9248af9a759321e1027226f988f54d96',
'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18',
'uploader': 'Maybe Baby',
'uploader_id': '33628',
@ -77,7 +77,9 @@ def _real_extract(self, url):
display_id, username = self._match_valid_url(url).group('id', 'username')
webpage = self._download_webpage(url, display_id)
webpage_info = self._search_json(r'<script[^>]*>\s*window\._preloads\s*=', webpage, 'preloads', display_id)
webpage_info = self._parse_json(self._search_json(
r'window\._preloads\s*=\s*JSON\.parse\(', webpage, 'json string',
display_id, transform_source=js_to_json, contains_pattern=r'"{(?s:.+)}"'), display_id)
post_type = webpage_info['post']['type']
formats, subtitles = [], {}

View file

@ -1,8 +1,13 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
extract_attributes,
get_element_by_id,
get_element_html_by_class,
int_or_none,
str_or_none,
traverse_obj,
url_or_none,
)
@ -21,7 +26,15 @@ class SverigesRadioBaseIE(InfoExtractor):
}
def _real_extract(self, url):
audio_id = self._match_id(url)
audio_id, display_id = self._match_valid_url(url).group('id', 'slug')
if not audio_id:
webpage = self._download_webpage(url, display_id)
audio_id = (
traverse_obj(
get_element_html_by_class('audio-button', webpage),
({extract_attributes}, ('data-audio-id', 'data-publication-id')), get_all=False)
or self._parse_json(get_element_by_id('gtm-metadata', webpage), display_id)['pageId'])
query = {
'id': audio_id,
'type': self._AUDIO_TYPE,
@ -30,7 +43,6 @@ def _real_extract(self, url):
item = self._download_json(
self._BASE_URL + 'audiometadata', audio_id,
'Downloading audio JSON metadata', query=query)['items'][0]
title = item['subtitle']
query['format'] = 'iis'
urls = []
@ -61,18 +73,20 @@ def _real_extract(self, url):
return {
'id': audio_id,
'title': title,
'formats': formats,
'series': item.get('title'),
'duration': int_or_none(item.get('duration')),
'thumbnail': item.get('displayimageurl'),
'description': item.get('description'),
**traverse_obj(item, {
'title': 'subtitle',
'series': 'title',
'duration': ('duration', {int_or_none}),
'thumbnail': ('displayimageurl', {url_or_none}),
'description': 'description',
}),
}
class SverigesRadioPublicationIE(SverigesRadioBaseIE):
IE_NAME = 'sverigesradio:publication'
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P<id>[0-9]+)'
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?(?:artikel|gruppsida)(?:\.aspx\?.*?\bartikel=(?P<id>[0-9]+)|/(?P<slug>[\w-]+))'
_TESTS = [{
'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
'md5': '6a4917e1923fccb080e5a206a5afa542',
@ -85,6 +99,18 @@ class SverigesRadioPublicationIE(SverigesRadioBaseIE):
'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'https://sverigesradio.se/artikel/tysk-fotbollsfeber-bayern-munchens-10-ariga-segersvit-kan-brytas',
'md5': 'f8a914ad50f491bb74eed403ab4bfef6',
'info_dict': {
'id': '8360345',
'ext': 'm4a',
'title': 'Tysk fotbollsfeber när Bayern Münchens 10-åriga segersvit kan brytas',
'series': 'Radiosporten',
'description': 'md5:5254610e20ce527ecb3a6102a06dcc5f',
'duration': 72,
'thumbnail': r're:^https?://.*\.jpg',
},
}, {
'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
'only_matching': True,
@ -94,8 +120,8 @@ class SverigesRadioPublicationIE(SverigesRadioBaseIE):
class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
IE_NAME = 'sverigesradio:episode'
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)'
_TEST = {
_VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?:(?P<id>\d+)|(?P<slug>[\w-]+))(?:$|[#?])'
_TESTS = [{
'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
'md5': '20dc4d8db24228f846be390b0c59a07c',
'info_dict': {
@ -106,6 +132,18 @@ class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
'title': 'Metoo och valen',
'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
'thumbnail': r're:^https?://.*\.jpg',
}
}
},
}, {
'url': 'https://sverigesradio.se/avsnitt/p4-live-med-first-aid-kit-scandinavium-mars-2023',
'md5': 'ce17fb82520a8033dbb846993d5589fe',
'info_dict': {
'id': '2160416',
'ext': 'm4a',
'title': 'P4 Live med First Aid Kit',
'description': 'md5:6d5b78eed3d2b65f6de04daa45e9285d',
'thumbnail': r're:^https?://.*\.jpg',
'series': 'P4 Live',
'duration': 5640,
},
}]
_AUDIO_TYPE = 'episode'

View file

@ -2,10 +2,12 @@
from .common import InfoExtractor
from ..utils import (
js_to_json,
UnsupportedError,
extract_attributes,
try_get,
int_or_none,
js_to_json,
parse_iso8601,
try_get,
)
@ -14,36 +16,38 @@ class TagesschauIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
'md5': '7a7287612fa881a1ae1d087df45c2fd6',
'md5': 'ccb9359bf8c4795836e43759f3408a93',
'info_dict': {
'id': 'video-102143-1',
'ext': 'mp4',
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
'duration': 138,
},
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
'md5': '3c54c1f6243d279b706bde660ceec633',
'md5': '5c15e8f3da049e48829ec9786d835536',
'info_dict': {
'id': 'ts-5727-1',
'ext': 'mp4',
'title': 'Ganze Sendung',
'duration': 932,
},
}, {
# exclusive audio
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
'md5': '4cf22023c285f35e99c24d290ba58cc9',
'md5': '4bff8f23504df56a0d86ed312d654182',
'info_dict': {
'id': 'audio-29417-1',
'ext': 'mp3',
'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
},
}, {
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
'md5': '12cfb212d9325b5ba0d52b625f1aa61c',
'md5': 'f049fa1698d7564e9ca4c3325108f034',
'info_dict': {
'id': 'bnd-303-1',
'ext': 'mp4',
'title': 'SPD-Gruppenbild mit Bärbel Bas nach der Fraktionssitzung | dpa',
'ext': 'mp3',
'title': 'Das Siegel des Bundesnachrichtendienstes | dpa',
},
}, {
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
@ -51,13 +55,24 @@ class TagesschauIE(InfoExtractor):
'id': 'afd-parteitag-135',
'title': 'AfD',
},
'playlist_count': 20,
'playlist_mincount': 15,
}, {
'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
'info_dict': {
'id': 'audio-29417-1',
'ext': 'mp3',
'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
},
}, {
'url': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-327.html',
'info_dict': {
'id': 'podcast-11km-327',
'ext': 'mp3',
'title': 'Gewalt in der Kita Wenn Erzieher:innen schweigen',
'upload_date': '20230322',
'timestamp': 1679482808,
'thumbnail': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-329~_v-original.jpg',
'description': 'md5:dad059931fe4b3693e3656e93a249848',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
@ -117,7 +132,7 @@ def _real_extract(self, url):
formats = []
if media_url.endswith('master.m3u8'):
formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls')
elif media_url.endswith('.hi.mp3') and media_url.startswith('https://download'):
elif media_url.endswith('.mp3'):
formats = [{
'url': media_url,
'vcodec': 'none',
@ -130,20 +145,19 @@ def _real_extract(self, url):
'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])),
'formats': formats
})
if not entries:
raise UnsupportedError(url)
if len(entries) > 1:
return self.playlist_result(entries, display_id, title)
formats = entries[0]['formats']
video_info = self._search_json_ld(webpage, video_id)
description = video_info.get('description')
thumbnail = self._og_search_thumbnail(webpage) or video_info.get('thumbnail')
timestamp = video_info.get('timestamp')
title = title or video_info.get('description')
return {
'id': display_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
'timestamp': timestamp,
'description': description,
'thumbnail': self._og_search_thumbnail(webpage),
'formats': entries[0]['formats'],
'timestamp': parse_iso8601(self._html_search_meta('date', webpage)),
'description': self._og_search_description(webpage),
'duration': entries[0]['duration'],
}

View file

@ -163,11 +163,9 @@ class VQQBaseIE(TencentBaseIE):
_REFERER = 'v.qq.com'
def _get_webpage_metadata(self, webpage, video_id):
return self._parse_json(
self._search_regex(
r'(?s)<script[^>]*>[^<]*window\.__pinia\s*=\s*([^<]+)</script>',
webpage, 'pinia data', fatal=False),
video_id, transform_source=js_to_json, fatal=False)
return self._search_json(
r'<script[^>]*>[^<]*window\.__(?:pinia|PINIA__)\s*=',
webpage, 'pinia data', video_id, transform_source=js_to_json, fatal=False)
class VQQVideoIE(VQQBaseIE):
@ -176,7 +174,7 @@ class VQQVideoIE(VQQBaseIE):
_TESTS = [{
'url': 'https://v.qq.com/x/page/q326831cny0.html',
'md5': '84568b3722e15e9cd023b5594558c4a7',
'md5': 'b11c9cb781df710d686b950376676e2a',
'info_dict': {
'id': 'q326831cny0',
'ext': 'mp4',
@ -187,7 +185,7 @@ class VQQVideoIE(VQQBaseIE):
},
}, {
'url': 'https://v.qq.com/x/page/o3013za7cse.html',
'md5': 'cc431c4f9114a55643893c2c8ebf5592',
'md5': 'a1bcf42c6d28c189bd2fe2d468abb287',
'info_dict': {
'id': 'o3013za7cse',
'ext': 'mp4',
@ -208,6 +206,7 @@ class VQQVideoIE(VQQBaseIE):
'series': '鸡毛飞上天',
'format_id': r're:^shd',
},
'skip': '404',
}, {
'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html',
'md5': 'fadd10bf88aec3420f06f19ee1d24c5b',
@ -220,6 +219,7 @@ class VQQVideoIE(VQQBaseIE):
'series': '青年理工工作者生活研究所',
'format_id': r're:^shd',
},
'params': {'skip_download': 'm3u8'},
}, {
# Geo-restricted to China
'url': 'https://v.qq.com/x/cover/mcv8hkc8zk8lnov/x0036x5qqsr.html',

View file

@ -8,7 +8,7 @@ class TestURLIE(InfoExtractor):
""" Allows addressing of the test cases as test:yout.*be_1 """
IE_DESC = False # Do not list
_VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>[0-9]+))?$'
_VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>\d+|all))?$'
def _real_extract(self, url):
from . import gen_extractor_classes
@ -36,6 +36,10 @@ def _real_extract(self, url):
extractor = matching_extractors[0]
testcases = tuple(extractor.get_testcases(True))
if num == 'all':
return self.playlist_result(
[self.url_result(tc['url'], extractor) for tc in testcases],
url, f'{extractor.IE_NAME} tests')
try:
tc = testcases[int(num or 0)]
except IndexError:
@ -43,4 +47,4 @@ def _real_extract(self, url):
f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True)
self.to_screen(f'Test URL: {tc["url"]}')
return self.url_result(tc['url'])
return self.url_result(tc['url'], extractor)

View file

@ -62,7 +62,7 @@ def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
return self._download_json(
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
fatal=fatal, note=note, errnote=errnote, headers={
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)',
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)',
'Accept': 'application/json',
}, query=query)
@ -79,11 +79,11 @@ def _build_api_query(self, query, app_version, manifest_app_version):
'_rticket': int(time.time() * 1000),
'ts': int(time.time()),
'device_brand': 'Google',
'device_type': 'Pixel 4',
'device_type': 'Pixel 7',
'device_platform': 'android',
'resolution': '1080*1920',
'resolution': '1080*2400',
'dpi': 420,
'os_version': '10',
'os_version': '13',
'os_api': '29',
'carrier_region': 'US',
'sys_region': 'US',
@ -218,8 +218,8 @@ def mp3_meta(url):
def extract_addr(addr, add_meta={}):
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
if res:
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height'))
known_resolutions[res].setdefault('width', add_meta.get('width'))
known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height'))
known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width'))
parsed_meta.update(known_resolutions.get(res, {}))
add_meta.setdefault('height', int_or_none(res[:-1]))
return [{
@ -624,6 +624,32 @@ class TikTokIE(TikTokBaseIE):
'thumbnails': 'count:3',
},
'expected_warnings': ['Unable to find video in feed'],
}, {
# 1080p format
'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830',
'md5': '982512017a8a917124d5a08c8ae79621',
'info_dict': {
'id': '7107337212743830830',
'ext': 'mp4',
'title': 'new music video 4 dont come backkkk🧸🖤 i hope u enjoy !! @musicontiktok',
'description': 'new music video 4 dont come backkkk🧸🖤 i hope u enjoy !! @musicontiktok',
'uploader': 'tatemcrae',
'uploader_id': '86328792343818240',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'creator': 't8',
'artist': 't8',
'track': 'original sound',
'upload_date': '20220609',
'timestamp': 1654805899,
'duration': 150,
'view_count': int,
'like_count': int,
'repost_count': int,
'comment_count': int,
'thumbnail': r're:^https://.+\.webp',
},
'params': {'format': 'bytevc1_1080p_808907-0'},
}, {
# Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',

View file

@ -2,8 +2,11 @@
from .common import InfoExtractor
from ..utils import (
bool_or_none,
int_or_none,
parse_iso8601,
traverse_obj,
url_or_none,
)
@ -20,19 +23,25 @@ class TV4IE(InfoExtractor):
sport/|
)
)(?P<id>[0-9]+)'''
_GEO_COUNTRIES = ['SE']
_GEO_BYPASS = False
_TESTS = [
{
# not geo-restricted
'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
'md5': 'cb837212f342d77cec06e6dad190e96d',
'info_dict': {
'id': '2491650',
'ext': 'mp4',
'title': 'Kalla Fakta 5 (english subtitles)',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': int,
'description': '2491650',
'series': 'Kalla fakta',
'duration': 1335,
'thumbnail': r're:^https?://[^/?#]+/api/v2/img/',
'timestamp': 1385373240,
'upload_date': '20131125',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Unable to download f4m manifest'],
},
{
'url': 'http://www.tv4play.se/iframe/video/3054113',
@ -46,6 +55,7 @@ class TV4IE(InfoExtractor):
'timestamp': int,
'upload_date': '20150130',
},
'skip': '404 Not Found',
},
{
'url': 'http://www.tv4play.se/sport/3060959',
@ -69,29 +79,28 @@ class TV4IE(InfoExtractor):
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
'https://playback-api.b17g.net/asset/%s' % video_id,
video_id, 'Downloading video info JSON', query={
'service': 'tv4',
'device': 'browser',
'protocol': 'hls,dash',
'drm': 'widevine',
})['metadata']
title = info['title']
manifest_url = self._download_json(
'https://playback-api.b17g.net/media/' + video_id,
video_id, query={
def _call_api(self, endpoint, video_id, headers=None, query={}):
return self._download_json(
f'https://playback2.a2d.tv/{endpoint}/{video_id}', video_id,
f'Downloading {endpoint} API JSON', headers=headers, query={
'service': 'tv4',
'device': 'browser',
'protocol': 'hls',
})['playbackItem']['manifestUrl']
formats = []
subtitles = {}
**query,
})
def _real_extract(self, url):
video_id = self._match_id(url)
info = traverse_obj(self._call_api('asset', video_id, query={
'protocol': 'hls,dash',
'drm': 'widevine',
}), ('metadata', {dict})) or {}
manifest_url = self._call_api(
'play', video_id, headers=self.geo_verification_headers())['playbackItem']['manifestUrl']
formats, subtitles = [], {}
fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, video_id, 'mp4',
@ -117,20 +126,24 @@ def _real_extract(self, url):
subtitles = self._merge_subtitles(subtitles, subs)
if not formats and info.get('is_geo_restricted'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
self.raise_geo_restricted(
'This video is not available from your location due to geo-restriction, or not being authenticated',
countries=['SE'])
return {
'id': video_id,
'title': title,
'formats': formats,
'subtitles': subtitles,
'description': info.get('description'),
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('image'),
'is_live': info.get('isLive') is True,
'series': info.get('seriesTitle'),
'season_number': int_or_none(info.get('seasonNumber')),
'episode': info.get('episodeTitle'),
'episode_number': int_or_none(info.get('episodeNumber')),
**traverse_obj(info, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': (('broadcast_date_time', 'broadcastDateTime'), {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('image', {url_or_none}),
'is_live': ('isLive', {bool_or_none}),
'series': ('seriesTitle', {str}),
'season_number': ('seasonNumber', {int_or_none}),
'episode': ('episodeTitle', {str}),
'episode_number': ('episodeNumber', {int_or_none}),
}, get_all=False),
}

View file

@ -488,9 +488,9 @@ def _call_api(self, resource, video_id, query={}, **kwargs):
f'{self._API_BASE_URL}/{resource}', video_id,
query={'lang': 'pl', 'platform': 'BROWSER', **query},
expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
if is_valid(urlh.status):
if is_valid(urlh.getcode()):
return document
raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.getcode()})')
def _parse_video(self, video, with_url=True):
info_dict = traverse_obj(video, {

View file

@ -60,7 +60,7 @@ class TwitchBaseIE(InfoExtractor):
@property
def _CLIENT_ID(self):
return self._configuration_arg(
'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key=TwitchStreamIE, casesense=True)[0]
'client_id', ['ue6666qo983tsx6so1t0vnawi233wa'], ie_key='Twitch', casesense=True)[0]
def _perform_login(self, username, password):
def fail(message):

View file

@ -3,7 +3,6 @@
from .common import InfoExtractor
from .periscope import PeriscopeBaseIE, PeriscopeIE
from ..compat import functools # isort: split
from ..compat import (
compat_parse_qs,
compat_urllib_parse_unquote,
@ -30,11 +29,67 @@
class TwitterBaseIE(InfoExtractor):
_NETRC_MACHINE = 'twitter'
_API_BASE = 'https://api.twitter.com/1.1/'
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
_guest_token = None
_flow_token = None
_LOGIN_INIT_DATA = json.dumps({
'input_flow_data': {
'flow_context': {
'debug_overrides': {},
'start_location': {
'location': 'unknown'
}
}
},
'subtask_versions': {
'action_list': 2,
'alert_dialog': 1,
'app_download_cta': 1,
'check_logged_in_account': 1,
'choice_selection': 3,
'contacts_live_sync_permission_prompt': 0,
'cta': 7,
'email_verification': 2,
'end_flow': 1,
'enter_date': 1,
'enter_email': 2,
'enter_password': 5,
'enter_phone': 2,
'enter_recaptcha': 1,
'enter_text': 5,
'enter_username': 2,
'generic_urt': 3,
'in_app_notification': 1,
'interest_picker': 3,
'js_instrumentation': 1,
'menu_dialog': 1,
'notifications_permission_prompt': 2,
'open_account': 2,
'open_home_timeline': 1,
'open_link': 1,
'phone_verification': 4,
'privacy_options': 1,
'security_key': 3,
'select_avatar': 4,
'select_banner': 2,
'settings_list': 7,
'show_code': 1,
'sign_up': 2,
'sign_up_review': 4,
'tweet_selection_urt': 1,
'update_users': 1,
'upload_media': 1,
'user_recommendations_list': 4,
'user_recommendations_urt': 1,
'wait_spinner': 3,
'web_modal': 1
}
}, separators=(',', ':')).encode()
def _extract_variant_formats(self, variant, video_id):
variant_url = variant.get('url')
@ -86,18 +141,151 @@ def _search_dimensions_in_video_url(a_format, video_url):
'height': int(m.group('height')),
})
@functools.cached_property
@property
def is_logged_in(self):
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
def _call_api(self, path, video_id, query={}, graphql=False):
cookies = self._get_cookies(self._API_BASE)
def _fetch_guest_token(self, headers, display_id):
headers.pop('x-guest-token', None)
self._guest_token = traverse_obj(self._download_json(
f'{self._API_BASE}guest/activate.json', display_id,
'Downloading guest token', data=b'', headers=headers), 'guest_token')
if not self._guest_token:
raise ExtractorError('Could not retrieve guest token')
def _set_base_headers(self):
headers = self._AUTH.copy()
csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
if csrf_token:
headers['x-csrf-token'] = csrf_token
return headers
csrf_cookie = cookies.get('ct0')
if csrf_cookie:
headers['x-csrf-token'] = csrf_cookie.value
def _call_login_api(self, note, headers, query={}, data=None):
response = self._download_json(
f'{self._API_BASE}onboarding/task.json', None, note,
headers=headers, query=query, data=data, expected_status=400)
error = traverse_obj(response, ('errors', 0, 'message', {str}))
if error:
raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
elif traverse_obj(response, 'status') != 'success':
raise ExtractorError('Login was unsuccessful')
subtask = traverse_obj(
response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
if not subtask:
raise ExtractorError('Twitter API did not return next login subtask')
self._flow_token = response['flow_token']
return subtask
def _perform_login(self, username, password):
if self.is_logged_in:
return
self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
headers = self._set_base_headers()
self._fetch_guest_token(headers, None)
headers.update({
'content-type': 'application/json',
'x-guest-token': self._guest_token,
'x-twitter-client-language': 'en',
'x-twitter-active-user': 'yes',
'Referer': 'https://twitter.com/',
'Origin': 'https://twitter.com',
})
def build_login_json(*subtask_inputs):
return json.dumps({
'flow_token': self._flow_token,
'subtask_inputs': subtask_inputs
}, separators=(',', ':')).encode()
def input_dict(subtask_id, text):
return {
'subtask_id': subtask_id,
'enter_text': {
'text': text,
'link': 'next_link'
}
}
next_subtask = self._call_login_api(
'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
while not self.is_logged_in:
if next_subtask == 'LoginJsInstrumentationSubtask':
next_subtask = self._call_login_api(
'Submitting JS instrumentation response', headers, data=build_login_json({
'subtask_id': next_subtask,
'js_instrumentation': {
'response': '{}',
'link': 'next_link'
}
}))
elif next_subtask == 'LoginEnterUserIdentifierSSO':
next_subtask = self._call_login_api(
'Submitting username', headers, data=build_login_json({
'subtask_id': next_subtask,
'settings_list': {
'setting_responses': [{
'key': 'user_identifier',
'response_data': {
'text_data': {
'result': username
}
}
}],
'link': 'next_link'
}
}))
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
next_subtask = self._call_login_api(
'Submitting alternate identifier', headers,
data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
'one of username, phone number or email that was not used as --username'))))
elif next_subtask == 'LoginEnterPassword':
next_subtask = self._call_login_api(
'Submitting password', headers, data=build_login_json({
'subtask_id': next_subtask,
'enter_password': {
'password': password,
'link': 'next_link'
}
}))
elif next_subtask == 'AccountDuplicationCheck':
next_subtask = self._call_login_api(
'Submitting account duplication check', headers, data=build_login_json({
'subtask_id': next_subtask,
'check_logged_in_account': {
'link': 'AccountDuplicationCheck_false'
}
}))
elif next_subtask == 'LoginTwoFactorAuthChallenge':
next_subtask = self._call_login_api(
'Submitting 2FA token', headers, data=build_login_json(input_dict(
next_subtask, self._get_tfa_info('two-factor authentication token'))))
elif next_subtask == 'LoginAcid':
next_subtask = self._call_login_api(
'Submitting confirmation code', headers, data=build_login_json(input_dict(
next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
elif next_subtask == 'LoginSuccessSubtask':
raise ExtractorError('Twitter API did not grant auth token cookie')
else:
raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
self.report_login()
def _call_api(self, path, video_id, query={}, graphql=False):
headers = self._set_base_headers()
if self.is_logged_in:
headers.update({
'x-twitter-auth-type': 'OAuth2Session',
@ -106,15 +294,10 @@ def _call_api(self, path, video_id, query={}, graphql=False):
})
for first_attempt in (True, False):
if not self.is_logged_in and not self._guest_token:
headers.pop('x-guest-token', None)
self._guest_token = traverse_obj(self._download_json(
f'{self._API_BASE}guest/activate.json', video_id,
'Downloading guest token', data=b'', headers=headers), 'guest_token')
if self._guest_token:
if not self.is_logged_in:
if not self._guest_token:
self._fetch_guest_token(headers, video_id)
headers['x-guest-token'] = self._guest_token
elif not self.is_logged_in:
raise ExtractorError('Could not retrieve guest token')
allowed_status = {400, 401, 403, 404} if graphql else {403}
result = self._download_json(

View file

@ -112,18 +112,19 @@ def parse_lang_code(code):
lang = ISO639Utils.short2long(lang)
return lang or None
for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
if (k in ('sd', 'hd') or not isinstance(v, dict)):
continue
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
if not sttl_url:
continue
lang = parse_lang_code(lang)
if not lang:
continue
sttl = subtitles.get(lang) or []
sttl.append({'ext': k, 'url': sttl_url, })
subtitles[lang] = sttl
for stream in urplayer_data['streamingInfo'].values():
for k, v in stream.items():
if (k in ('sd', 'hd') or not isinstance(v, dict)):
continue
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
if not sttl_url:
continue
lang = parse_lang_code(lang)
if not lang:
continue
sttl = subtitles.get(lang) or []
sttl.append({'ext': k, 'url': sttl_url, })
subtitles[lang] = sttl
image = urplayer_data.get('image') or {}
thumbnails = []

View file

@ -39,7 +39,7 @@ def is_logged_in():
login_post, login_post_urlh = self._download_webpage_handle(
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
if login_post_urlh.status == 401:
if login_post_urlh.getcode() == 401:
if get_element_by_class('onboarding-content-register-popup__title', login_post):
raise ExtractorError(
'Unable to log in: The provided email has not registered yet.', expected=True)

View file

@ -1,14 +1,86 @@
import json
import time
import urllib.error
import uuid
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
jwt_decode_hs256,
parse_age_limit,
traverse_obj,
try_call,
try_get,
unified_timestamp,
unified_strdate,
)
class VootIE(InfoExtractor):
class VootBaseIE(InfoExtractor):
_NETRC_MACHINE = 'voot'
_GEO_BYPASS = False
_LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.'
_TOKEN = None
_EXPIRY = 0
_API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'}
def _perform_login(self, username, password):
if self._TOKEN and self._EXPIRY:
return
if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
VootBaseIE._TOKEN = password
VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp']
self.report_login()
# Mobile number as username is not supported
elif not username.isdigit():
check_username = self._download_json(
'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({
'type': 'email',
'email': username
}, separators=(',', ':')).encode(), headers={
**self._API_HEADERS,
'Content-Type': 'application/json;charset=utf-8',
}, note='Checking username', expected_status=403)
if not traverse_obj(check_username, ('isExist', {bool})):
if traverse_obj(check_username, ('status', 'code', {int})) == 9999:
self.raise_geo_restricted(countries=['IN'])
raise ExtractorError('Incorrect username', expected=True)
auth_token = traverse_obj(self._download_json(
'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({
'type': 'traditional',
'deviceId': str(uuid.uuid4()),
'deviceBrand': 'PC/MAC',
'data': {
'email': username,
'password': password
}
}, separators=(',', ':')).encode(), headers={
**self._API_HEADERS,
'Content-Type': 'application/json;charset=utf-8',
}, note='Logging in', expected_status=400), ('data', 'authToken', {dict}))
if not auth_token:
raise ExtractorError('Incorrect password', expected=True)
VootBaseIE._TOKEN = auth_token['accessToken']
VootBaseIE._EXPIRY = auth_token['expirationTime']
else:
raise ExtractorError(self._LOGIN_HINT, expected=True)
def _check_token_expiry(self):
if int(time.time()) >= self._EXPIRY:
raise ExtractorError('Access token has expired', expected=True)
def _real_initialize(self):
if not self._TOKEN:
self.raise_login_required(self._LOGIN_HINT, method=None)
self._check_token_expiry()
class VootIE(VootBaseIE):
_VALID_URL = r'''(?x)
(?:
voot:|
@ -20,27 +92,25 @@ class VootIE(InfoExtractor):
)
(?P<id>\d{3,})
'''
_GEO_COUNTRIES = ['IN']
_TESTS = [{
'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353',
'info_dict': {
'id': '0_8ledb18o',
'id': '441353',
'ext': 'mp4',
'title': 'Ishq Ka Rang Safed - Season 01 - Episode 340',
'title': 'Is this the end of Kamini?',
'description': 'md5:06291fbbbc4dcbe21235c40c262507c1',
'timestamp': 1472162937,
'timestamp': 1472103000,
'upload_date': '20160825',
'series': 'Ishq Ka Rang Safed',
'season_number': 1,
'episode': 'Is this the end of Kamini?',
'episode_number': 340,
'view_count': int,
'like_count': int,
'release_date': '20160825',
'season': 'Season 1',
'age_limit': 13,
'duration': 1146.0,
},
'params': {
'skip_download': True,
},
'expected_warnings': ['Failed to download m3u8 information'],
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925',
'only_matching': True,
@ -55,59 +125,50 @@ class VootIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
media_info = self._download_json(
'https://wapi.voot.com/ws/ott/getMediaInfo.json', video_id,
query={
'platform': 'Web',
'pId': 2,
'mediaId': video_id,
})
'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id,
query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN})
status_code = try_get(media_info, lambda x: x['status']['code'], int)
if status_code != 0:
raise ExtractorError(media_info['status']['message'], expected=True)
try:
m3u8_url = self._download_json(
'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id,
'Downloading playback JSON', data=b'{}', headers={
**self.geo_verification_headers(),
**self._API_HEADERS,
'Content-Type': 'application/json;charset=utf-8',
'platform': 'androidwebdesktop',
'vootid': video_id,
'voottoken': self._TOKEN,
})['m3u8']
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 400:
self._check_token_expiry()
raise
media = media_info['assets']
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
self._remove_duplicate_formats(formats)
entry_id = media['EntryId']
title = media['MediaName']
formats = self._extract_m3u8_formats(
'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
video_id, 'mp4', m3u8_id='hls')
description, series, season_number, episode, episode_number = [None] * 5
for meta in try_get(media, lambda x: x['Metas'], list) or []:
key, value = meta.get('Key'), meta.get('Value')
if not key or not value:
continue
if key == 'ContentSynopsis':
description = value
elif key == 'RefSeriesTitle':
series = value
elif key == 'RefSeriesSeason':
season_number = int_or_none(value)
elif key == 'EpisodeMainTitle':
episode = value
elif key == 'EpisodeNo':
episode_number = int_or_none(value)
return {
'extractor_key': 'Kaltura',
'id': entry_id,
'title': title,
'description': description,
'series': series,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
'timestamp': unified_timestamp(media.get('CreationDate')),
'duration': int_or_none(media.get('Duration')),
'view_count': int_or_none(media.get('ViewCounter')),
'like_count': int_or_none(media.get('like_counter')),
'formats': formats,
'id': video_id,
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
'formats': traverse_obj(formats, (
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
'http_headers': self._API_HEADERS,
**traverse_obj(media_info, ('result', 0, {
'title': ('fullTitle', {str}),
'description': ('fullSynopsis', {str}),
'series': ('showName', {str}),
'season_number': ('season', {int_or_none}),
'episode': ('fullTitle', {str}),
'episode_number': ('episode', {int_or_none}),
'timestamp': ('uploadTime', {int_or_none}),
'release_date': ('telecastDate', {unified_strdate}),
'age_limit': ('ageNemonic', {parse_age_limit}),
'duration': ('duration', {float_or_none}),
})),
}
class VootSeriesIE(InfoExtractor):
class VootSeriesIE(VootBaseIE):
_VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})'
_TESTS = [{
'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002',

View file

@ -41,7 +41,7 @@ def _TOKEN(self):
token = try_call(lambda: self._get_cookies('https://www.wrestle-universe.com/')['token'].value)
if not token and not self._REFRESH_TOKEN:
self.raise_login_required()
self._REAL_TOKEN = token
self._TOKEN = token
if not self._REAL_TOKEN or self._TOKEN_EXPIRY <= int(time.time()):
if not self._REFRESH_TOKEN:

View file

@ -158,7 +158,7 @@ def _fetch_page(self, playlist_id, page_idx):
return self._download_json(
'https://www.ximalaya.com/revision/album/v1/getTracksList',
playlist_id, note=f'Downloading tracks list page {page_idx}',
query={'albumId': playlist_id, 'pageNum': page_idx, 'sort': 1})['data']
query={'albumId': playlist_id, 'pageNum': page_idx})['data']
def _get_entries(self, page_data):
for e in page_data['tracks']:

View file

@ -1,9 +1,10 @@
from .common import InfoExtractor
from ..utils import (
OnDemandPagedList,
int_or_none,
traverse_obj,
unified_timestamp,
url_or_none
url_or_none,
)
@ -97,3 +98,30 @@ def _real_extract(self, url):
'categories': traverse_obj(media_data, ('categories', ..., 'name')) or None,
'repost_count': int_or_none(media_data.get('sharingCount'))
}
class YappyProfileIE(InfoExtractor):
_VALID_URL = r'https?://yappy\.media/profile/(?P<id>\w+)'
_TESTS = [{
'url': 'https://yappy.media/profile/59a0c8c485e5410b9c43474bf4c6a373',
'info_dict': {
'id': '59a0c8c485e5410b9c43474bf4c6a373',
},
'playlist_mincount': 527,
}]
def _real_extract(self, url):
profile_id = self._match_id(url)
def fetch_page(page_num):
page_num += 1
videos = self._download_json(
f'https://yappy.media/api/video/list/{profile_id}?page={page_num}',
profile_id, f'Downloading profile page {page_num} JSON')
for video in traverse_obj(videos, ('results', lambda _, v: v['uuid'])):
yield self.url_result(
f'https://yappy.media/video/{video["uuid"]}', YappyIE,
video['uuid'], video.get('description'))
return self.playlist_result(OnDemandPagedList(fetch_page, 15), profile_id)

View file

@ -258,7 +258,7 @@ def build_innertube_clients():
THIRD_PARTY = {
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
}
BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@ -292,6 +292,7 @@ class BadgeType(enum.Enum):
AVAILABILITY_PREMIUM = enum.auto()
AVAILABILITY_SUBSCRIPTION = enum.auto()
LIVE_NOW = enum.auto()
VERIFIED = enum.auto()
class YoutubeBaseInfoExtractor(InfoExtractor):
@ -791,17 +792,26 @@ def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
def _extract_and_report_alerts(self, data, *args, **kwargs):
return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
def _extract_badges(self, renderer: dict):
privacy_icon_map = {
def _extract_badges(self, badge_list: list):
"""
Extract known BadgeType's from a list of badge renderers.
@returns [{'type': BadgeType}]
"""
icon_type_map = {
'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
'CHECK': BadgeType.VERIFIED,
}
badge_style_map = {
'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
}
label_map = {
@ -809,13 +819,15 @@ def _extract_badges(self, renderer: dict):
'private': BadgeType.AVAILABILITY_PRIVATE,
'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
'live': BadgeType.LIVE_NOW,
'premium': BadgeType.AVAILABILITY_PREMIUM
'premium': BadgeType.AVAILABILITY_PREMIUM,
'verified': BadgeType.VERIFIED,
'official artist channel': BadgeType.VERIFIED,
}
badges = []
for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
badge_type = (
privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
or badge_style_map.get(traverse_obj(badge, 'style'))
)
if badge_type:
@ -823,11 +835,12 @@ def _extract_badges(self, renderer: dict):
continue
# fallback, won't work in some languages
label = traverse_obj(badge, 'label', expected_type=str, default='')
label = traverse_obj(
badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
for match, label_badge_type in label_map.items():
if match in label.lower():
badges.append({'type': badge_type})
continue
badges.append({'type': label_badge_type})
break
return badges
@ -1020,8 +1033,8 @@ def _extract_video(self, renderer):
overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
get_all=False, expected_type=str)
badges = self._extract_badges(renderer)
badges = self._extract_badges(traverse_obj(renderer, 'badges'))
owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
expected_type=str)) or ''
@ -1079,7 +1092,8 @@ def _extract_video(self, renderer):
needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
view_count_field: view_count,
'live_status': live_status
'live_status': live_status,
'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
}
@ -1332,6 +1346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Philipp Hagemeister',
'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
'uploader_id': '@PhilippHagemeister',
'heatmap': 'count:100',
},
'params': {
'skip_download': True,
@ -1415,6 +1430,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'The Witcher',
'uploader_url': 'https://www.youtube.com/@thewitcher',
'uploader_id': '@thewitcher',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
},
},
{
@ -1444,6 +1462,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
'uploader_id': '@FlyingKitty900',
'comment_count': int,
'channel_is_verified': True,
},
},
{
@ -1577,6 +1596,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Olympics',
'uploader_url': 'https://www.youtube.com/@Olympics',
'uploader_id': '@Olympics',
'channel_is_verified': True,
},
'params': {
'skip_download': 'requires avconv',
@ -1894,6 +1914,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Bernie Sanders',
'uploader_url': 'https://www.youtube.com/@BernieSanders',
'uploader_id': '@BernieSanders',
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {
'skip_download': True,
@ -1955,6 +1977,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Vsauce',
'uploader_url': 'https://www.youtube.com/@Vsauce',
'uploader_id': '@Vsauce',
'comment_count': int,
'channel_is_verified': True,
},
'params': {
'skip_download': True,
@ -2147,6 +2171,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'kudvenkat',
'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
'uploader_id': '@Csharp-video-tutorialsBlogspot',
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {
'skip_download': True,
@ -2227,6 +2253,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'CBS Mornings',
'uploader_url': 'https://www.youtube.com/@CBSMornings',
'uploader_id': '@CBSMornings',
'comment_count': int,
'channel_is_verified': True,
}
},
{
@ -2297,6 +2325,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'colinfurze',
'uploader_url': 'https://www.youtube.com/@colinfurze',
'uploader_id': '@colinfurze',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {
'format': '17', # 3gp format available on android
@ -2342,6 +2373,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'SciShow',
'uploader_url': 'https://www.youtube.com/@SciShow',
'uploader_id': '@SciShow',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
}, 'params': {'format': 'mhtml', 'skip_download': True}
}, {
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@ -2370,6 +2404,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Leon Nguyen',
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
'heatmap': 'count:100',
}
}, {
# Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
@ -2398,6 +2433,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Leon Nguyen',
'uploader_url': 'https://www.youtube.com/@LeonNguyen',
'uploader_id': '@LeonNguyen',
'heatmap': 'count:100',
},
'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
}, {
@ -2428,6 +2464,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Quackity',
'uploader_id': '@Quackity',
'uploader_url': 'https://www.youtube.com/@Quackity',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
}
},
{ # continuous livestream. Microformat upload date should be preferred.
@ -2594,6 +2633,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'MrBeast',
'uploader_url': 'https://www.youtube.com/@MrBeast',
'uploader_id': '@MrBeast',
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
}, {
@ -2655,6 +2697,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'さなちゃんねる',
'uploader_url': 'https://www.youtube.com/@sana_natori',
'uploader_id': '@sana_natori',
'channel_is_verified': True,
'heatmap': 'count:100',
},
},
{
@ -2684,6 +2728,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'thumbnail': r're:^https?://.*\.webp',
'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
'playable_in_embed': True,
'comment_count': int,
'channel_is_verified': True,
'heatmap': 'count:100',
},
'params': {
'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
@ -2720,6 +2767,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Christopher Sykes',
'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
'uploader_id': '@ChristopherSykesDocumentaries',
'heatmap': 'count:100',
},
'params': {
'skip_download': True,
@ -3121,7 +3169,7 @@ def _extract_n_function_name(self, jscode):
return funcname
return json.loads(js_to_json(self._search_regex(
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])[,;]', jscode,
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
def _extract_n_function_code(self, video_id, player_url):
@ -3337,14 +3385,13 @@ def _extract_comment(self, comment_renderer, parent=None):
info['author_is_uploader'] = author_is_uploader
comment_abr = traverse_obj(
comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict)
comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
if comment_abr is not None:
info['is_favorited'] = 'creatorHeart' in comment_abr
comment_ab_icontype = traverse_obj(
comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType'))
if comment_ab_icontype is not None:
info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE')
badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
if self._has_badge(badges, BadgeType.VERIFIED):
info['author_is_verified'] = True
is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
if is_pinned:
@ -3581,7 +3628,7 @@ def _is_agegated(player_response):
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
_STORY_PLAYER_PARAMS = '8AEB'
_PLAYER_PARAMS = 'CgIQBg=='
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
@ -3595,7 +3642,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
'videoId': video_id,
}
if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
yt_query['params'] = self._STORY_PLAYER_PARAMS
yt_query['params'] = self._PLAYER_PARAMS
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
@ -3607,7 +3654,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
default = ['android', 'web']
default = ['ios', 'android', 'web']
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@ -3830,6 +3877,8 @@ def build_fragments(f):
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt.get('fps')) or 0
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
@ -3837,16 +3886,16 @@ def build_fragments(f):
'format_note': join_nonempty(
join_nonempty(audio_track.get('displayName'),
language_preference > 0 and ' (default)', delim=''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
fmt.get('isDrc') and 'DRC',
name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED',
(self.get_param('verbose') or all_formats) and client_name,
delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': -10 if throttled else -5 if itag == '22' else -1,
'fps': int_or_none(fmt.get('fps')) or None,
'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
+ (100 if 'Premium' in name else 0)),
'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
'audio_channels': fmt.get('audioChannels'),
'height': height,
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
@ -3915,11 +3964,17 @@ def process_manifest_format(f, proto, client_name, itag):
elif itag:
f['format_id'] = itag
if itag in ('616', '235'):
f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
f['source_preference'] = (f.get('source_preference') or -1) + 100
f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
if f['quality'] == -1 and f.get('height'):
f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
if self.get_param('verbose'):
if self.get_param('verbose') or all_formats:
f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
if f.get('fps') and f['fps'] <= 1:
del f['fps']
return True
subtitles = {}
@ -3992,8 +4047,8 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
query = {'bpctr': '9999999999', 'has_verified': '1'}
if smuggled_data.get('is_story'):
query['pp'] = self._STORY_PLAYER_PARAMS
if smuggled_data.get('is_story'): # XXX: Deprecated
query['pp'] = self._PLAYER_PARAMS
webpage = self._download_webpage(
webpage_url, video_id, fatal=False, query=query)
@ -4297,9 +4352,13 @@ def process_language(container, base_url, lang_code, sub_name, query):
continue
trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, None, ' from %s')
# Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility
if lang_code == f'a-{orig_trans_code}':
# Set audio language based on original subtitles
for f in formats:
if f.get('acodec') != 'none' and not f.get('language'):
f['language'] = orig_trans_code
# Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility
process_language(
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
# Setting tlang=lang returns damaged subtitles.
@ -4319,15 +4378,21 @@ def process_language(container, base_url, lang_code, sub_name, query):
info[d_k] = parse_duration(query[k][0])
# Youtube Music Auto-generated description
if video_description:
if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
# XXX: Causes catastrophic backtracking if description has "·"
# E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
# Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
# reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
mobj = re.search(
r'''(?xs)
(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
(?P<album>[^\n]+)
(?=(?P<track>[^\n·]+))(?P=track)·
(?=(?P<artist>[^\n]+))(?P=artist)\n+
(?=(?P<album>[^\n]+))(?P=album)\n
(?:.+?\s*(?P<release_year>\d{4})(?!\d))?
(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
.+\nAuto-generated\ by\ YouTube\.\s*$
(.+?\nArtist\s*:\s*
(?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
)?.+\nAuto-generated\ by\ YouTube\.\s*$
''', video_description)
if mobj:
release_year = mobj.group('release_year')
@ -4488,6 +4553,9 @@ def process_language(container, base_url, lang_code, sub_name, query):
info['artist'] = mrr_contents_text
elif mrr_title == 'Song':
info['track'] = mrr_contents_text
owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
if self._has_badge(owner_badges, BadgeType.VERIFIED):
info['channel_is_verified'] = True
info.update({
'uploader': info.get('channel'),
@ -4505,7 +4573,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
):
upload_date = strftime_or_none(
self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
info['upload_date'] = upload_date
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
@ -4513,7 +4581,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
if v:
info[d_k] = v
badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
badges = self._extract_badges(traverse_obj(vpir, 'badges'))
is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
or get_first(video_details, 'isPrivate', expected_type=bool))
@ -4586,13 +4654,14 @@ def _extract_channel_renderer(self, renderer):
channel_id = self.ucid_or_none(renderer['channelId'])
title = self._get_text(renderer, 'title')
channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
# As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
# However we can expect them to change that in the future.
channel_handle = self.handle_from_url(
traverse_obj(renderer, (
'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
('browseEndpoint', 'canonicalBaseUrl')),
{str}), get_all=False))
if not channel_handle:
# As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
return {
'_type': 'url',
'url': channel_url,
@ -4605,10 +4674,18 @@ def _extract_channel_renderer(self, renderer):
'title': title,
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
# See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
# However, in feed/channels this is set correctly to the subscriber count
'channel_follower_count': traverse_obj(
renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
'playlist_count': self._get_count(renderer, 'videoCountText'),
'playlist_count': (
# videoCountText may be the subscriber count
self._get_count(renderer, 'videoCountText')
if self._get_count(renderer, 'subscriberCountText') is not None else None),
'description': self._get_text(renderer, 'descriptionSnippet'),
'channel_is_verified': True if self._has_badge(
self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
}
def _grid_entries(self, grid_renderer):
@ -5024,6 +5101,10 @@ def _get_uncropped(url):
'uploader_id': channel_handle,
'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
})
channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
if self._has_badge(channel_badges, BadgeType.VERIFIED):
info['channel_is_verified'] = True
# Playlist stats is a text runs array containing [video count, view count, last updated].
# last updated or (view count and last updated) may be missing.
playlist_stats = get_first(
@ -5032,7 +5113,7 @@ def _get_uncropped(url):
last_updated_unix = self._parse_time_text(
self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
info['modified_date'] = strftime_or_none(last_updated_unix)
info['view_count'] = self._get_count(playlist_stats, 1)
if info['view_count'] is None: # 0 is allowed
@ -5132,7 +5213,7 @@ def _extract_availability(self, data):
playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
player_header_privacy = playlist_header_renderer.get('privacy')
badges = self._extract_badges(sidebar_renderer)
badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
privacy_setting_icon = get_first(
@ -5382,7 +5463,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader': '3Blue1Brown',
'tags': ['Mathematics'],
'channel_follower_count': int
'channel_follower_count': int,
'channel_is_verified': True,
},
}, {
'note': 'playlists, singlepage',
@ -5559,6 +5641,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
'channel_is_verified': True,
},
}, {
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
@ -5722,7 +5805,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': {
'id': 'AlTsmyW4auo', # This will keep changing
'id': 'hGkQjiJLjWQ', # This will keep changing
'ext': 'mp4',
'title': str,
'upload_date': r're:\d{8}',
@ -5746,6 +5829,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@SkyNews',
'uploader_id': '@SkyNews',
'uploader': 'Sky News',
'channel_is_verified': True,
},
'params': {
'skip_download': True,
@ -6234,7 +6318,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': str,
'uploader': str,
'uploader_url': str,
'uploader_id': str
'uploader_id': str,
'channel_is_verified': bool, # this will keep changing
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
@ -6270,6 +6355,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'PewDiePie',
'uploader_url': 'https://www.youtube.com/@PewDiePie',
'uploader_id': '@PewDiePie',
'channel_is_verified': True,
}
}],
'params': {'extract_flat': True},
@ -6288,6 +6374,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown',
'channel_is_verified': True,
},
'playlist_count': 0,
}, {
@ -6322,6 +6409,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'description': 'I make music',
'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
'channel_follower_count': int,
'channel_is_verified': True,
},
'playlist_mincount': 10,
}]
@ -6897,12 +6985,15 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
'title': 'Kurzgesagt In a Nutshell',
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
'playlist_count': int, # XXX: should have a way of saying > 1
# No longer available for search as it is set to the handle.
# 'playlist_count': int,
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'thumbnails': list,
'uploader_id': '@kurzgesagt',
'uploader_url': 'https://www.youtube.com/@kurzgesagt',
'uploader': 'Kurzgesagt In a Nutshell',
'channel_is_verified': True,
'channel_follower_count': int,
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
@ -7166,6 +7257,8 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
'live_status': 'not_live',
'channel_follower_count': int,
'chapters': 'count:20',
'comment_count': int,
'heatmap': 'count:100',
}
}]
@ -7226,6 +7319,8 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
'channel': 'さなちゃんねる',
'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
'uploader': 'さなちゃんねる',
'channel_is_verified': True,
'heatmap': 'count:100',
},
'add_ie': ['Youtube'],
'params': {'skip_download': 'Youtube'},

130
yt_dlp/extractor/zaiko.py Normal file
View file

@ -0,0 +1,130 @@
import base64
from .common import InfoExtractor
from ..utils import (
ExtractorError,
extract_attributes,
int_or_none,
str_or_none,
traverse_obj,
try_call,
unescapeHTML,
url_or_none,
)
class ZaikoBaseIE(InfoExtractor):
def _download_real_webpage(self, url, video_id):
webpage, urlh = self._download_webpage_handle(url, video_id)
final_url = urlh.geturl()
if 'zaiko.io/login' in final_url:
self.raise_login_required()
elif '/_buy/' in final_url:
raise ExtractorError('Your account does not have tickets to this event', expected=True)
return webpage
def _parse_vue_element_attr(self, name, string, video_id):
page_elem = self._search_regex(rf'(<{name}[^>]+>)', string, name)
attrs = {}
for key, value in extract_attributes(page_elem).items():
if key.startswith(':'):
attrs[key[1:]] = self._parse_json(
value, video_id, transform_source=unescapeHTML, fatal=False)
return attrs
class ZaikoIE(ZaikoBaseIE):
_VALID_URL = r'https?://(?:[\w-]+\.)?zaiko\.io/event/(?P<id>\d+)/stream(?:/\d+)+'
_TESTS = [{
'url': 'https://zaiko.io/event/324868/stream/20571/20571',
'info_dict': {
'id': '324868',
'ext': 'mp4',
'title': 'ZAIKO STREAMING TEST',
'alt_title': '[VOD] ZAIKO STREAMING TEST_20210603(Do Not Delete)',
'uploader_id': '454',
'uploader': 'ZAIKO ZERO',
'release_timestamp': 1583809200,
'thumbnail': r're:https://[a-z0-9]+.cloudfront.net/[a-z0-9_]+/[a-z0-9_]+',
'release_date': '20200310',
'categories': ['Tech House'],
'live_status': 'was_live',
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_real_webpage(url, video_id)
stream_meta = self._parse_vue_element_attr('stream-page', webpage, video_id)
player_page = self._download_webpage(
stream_meta['stream-access']['video_source'], video_id,
'Downloading player page', headers={'referer': 'https://zaiko.io/'})
player_meta = self._parse_vue_element_attr('player', player_page, video_id)
status = traverse_obj(player_meta, ('initial_event_info', 'status', {str}))
live_status, msg, expected = {
'vod': ('was_live', 'No VOD stream URL was found', False),
'archiving': ('post_live', 'Event VOD is still being processed', True),
'deleting': ('post_live', 'This event has ended', True),
'deleted': ('post_live', 'This event has ended', True),
'error': ('post_live', 'This event has ended', True),
'disconnected': ('post_live', 'Stream has been disconnected', True),
'live_to_disconnected': ('post_live', 'Stream has been disconnected', True),
'live': ('is_live', 'No livestream URL found was found', False),
'waiting': ('is_upcoming', 'Live event has not yet started', True),
'cancelled': ('not_live', 'Event has been cancelled', True),
}.get(status) or ('not_live', f'Unknown event status "{status}"', False)
stream_url = traverse_obj(player_meta, ('initial_event_info', 'endpoint', {url_or_none}))
formats = self._extract_m3u8_formats(
stream_url, video_id, live=True, fatal=False) if stream_url else []
if not formats:
self.raise_no_formats(msg, expected=expected)
return {
'id': video_id,
'formats': formats,
'live_status': live_status,
**traverse_obj(stream_meta, {
'title': ('event', 'name', {str}),
'uploader': ('profile', 'name', {str}),
'uploader_id': ('profile', 'id', {str_or_none}),
'release_timestamp': ('stream', 'start', 'timestamp', {int_or_none}),
'categories': ('event', 'genres', ..., {lambda x: x or None}),
}),
**traverse_obj(player_meta, ('initial_event_info', {
'alt_title': ('title', {str}),
'thumbnail': ('poster_url', {url_or_none}),
})),
}
class ZaikoETicketIE(ZaikoBaseIE):
_VALID_URL = r'https?://(?:www.)?zaiko\.io/account/eticket/(?P<id>[\w=-]{49})'
_TESTS = [{
'url': 'https://zaiko.io/account/eticket/TZjMwMzQ2Y2EzMXwyMDIzMDYwNzEyMTMyNXw1MDViOWU2Mw==',
'playlist_count': 1,
'info_dict': {
'id': 'f30346ca31-20230607121325-505b9e63',
'title': 'ZAIKO STREAMING TEST',
'thumbnail': 'https://media.zkocdn.net/pf_1/1_3wdyjcjyupseatkwid34u',
},
'skip': 'Only available with the ticketholding account',
}]
def _real_extract(self, url):
ticket_id = self._match_id(url)
ticket_id = try_call(
lambda: base64.urlsafe_b64decode(ticket_id[1:]).decode().replace('|', '-')) or ticket_id
webpage = self._download_real_webpage(url, ticket_id)
eticket = self._parse_vue_element_attr('eticket', webpage, ticket_id)
return self.playlist_result(
[self.url_result(stream, ZaikoIE) for stream in traverse_obj(eticket, ('streams', ..., 'url'))],
ticket_id, **traverse_obj(eticket, ('ticket-details', {
'title': 'event_name',
'thumbnail': 'event_img_url',
})))

View file

@ -24,7 +24,7 @@
class ZDFBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['DE']
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'uhd')
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
headers = {}
@ -61,6 +61,9 @@ def _extract_format(self, video_id, formats, format_urls, meta):
elif mime_type == 'application/f4m+xml' or ext == 'f4m':
new_formats = self._extract_f4m_formats(
update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)
elif ext == 'mpd':
new_formats = self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False)
else:
f = parse_codecs(meta.get('mimeCodec'))
if not f and meta.get('type'):

View file

@ -1,14 +1,16 @@
import json
import random
import string
import time
import uuid
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
jwt_decode_hs256,
parse_age_limit,
str_or_none,
try_call,
try_get,
unified_strdate,
unified_timestamp,
@ -94,12 +96,12 @@ class Zee5IE(InfoExtractor):
'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973',
'only_matching': True
}]
_DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
_DEVICE_ID = ''.join(random.choices(string.ascii_letters + string.digits, k=20)).ljust(32, '0')
_DEVICE_ID = str(uuid.uuid4())
_USER_TOKEN = None
_LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.'
_NETRC_MACHINE = 'zee5'
_GEO_COUNTRIES = ['IN']
_USER_COUNTRY = None
def _perform_login(self, username, password):
if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None:
@ -118,11 +120,16 @@ def _perform_login(self, username, password):
self._USER_TOKEN = otp_verify_json.get('token')
if not self._USER_TOKEN:
raise ExtractorError(otp_request_json['message'], expected=True)
elif username.lower() == 'token' and len(password) > 1198:
elif username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
self._USER_TOKEN = password
else:
raise ExtractorError(self._LOGIN_HINT, expected=True)
token = jwt_decode_hs256(self._USER_TOKEN)
if token.get('exp', 0) <= int(time.time()):
raise ExtractorError('User token has expired', expected=True)
self._USER_COUNTRY = token.get('current_country')
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
access_token_request = self._download_json(
@ -137,8 +144,13 @@ def _real_extract(self, url):
data['X-Z5-Guest-Token'] = self._DEVICE_ID
json_data = self._download_json(
self._DETAIL_API_URL.format(video_id, self._DEVICE_ID),
video_id, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
'https://spapi.zee5.com/singlePlayback/getDetails/secure', video_id, query={
'content_id': video_id,
'device_id': self._DEVICE_ID,
'platform_name': 'desktop_web',
'country': self._USER_COUNTRY or self.get_param('geo_bypass_country') or 'IN',
'check_parental_control': False,
}, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8'))
asset_data = json_data['assetDetails']
show_data = json_data.get('showDetails', {})
if 'premium' in asset_data['business_type']:

View file

@ -44,7 +44,7 @@ def wrapped(a, b):
def _js_div(a, b):
if JS_Undefined in (a, b) or not (a and b):
if JS_Undefined in (a, b) or not (a or b):
return float('nan')
return (a or 0) / b if b else float('inf')
@ -779,7 +779,7 @@ def extract_object(self, objname):
obj = {}
obj_m = re.search(
r'''(?x)
(?<!this\.)%s\s*=\s*{\s*
(?<!\.)%s\s*=\s*{\s*
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
}\s*;
''' % (re.escape(objname), _FUNC_NAME_RE),
@ -812,9 +812,9 @@ def extract_function_code(self, funcname):
\((?P<args>[^)]*)\)\s*
(?P<code>{.+})''' % {'name': re.escape(funcname)},
self.code)
code, _ = self._separate_at_paren(func_m.group('code'))
if func_m is None:
raise self.Exception(f'Could not find JS function "{funcname}"')
code, _ = self._separate_at_paren(func_m.group('code'))
return [x.strip() for x in func_m.group('args').split(',')], code
def extract_function(self, funcname):

View file

@ -474,15 +474,15 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
callback_kwargs={
'allowed_values': {
'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'playlist-match-filter',
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
}, 'aliases': {
'youtube-dl': ['all', '-multistreams'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'],
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter'],
'2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'],
'2022': ['no-external-downloader-progress'],
'2022': ['no-external-downloader-progress', 'playlist-match-filter'],
}
}, help=(
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
@ -727,6 +727,10 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'--netrc-location',
dest='netrc_location', metavar='PATH',
help='Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc')
authentication.add_option(
'--netrc-cmd',
dest='netrc_cmd', metavar='NETRC_CMD',
help='Command to execute to get the credentials for an extractor.')
authentication.add_option(
'--video-password',
dest='videopassword', metavar='PASSWORD',
@ -1015,8 +1019,9 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'--download-sections',
metavar='REGEX', dest='download_ranges', action='append',
help=(
'Download only chapters whose title matches the given regular expression. '
'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
'Download only chapters that match the regular expression. '
'A "*" prefix denotes time-range instead of chapter. Negative timestamps are calculated from the end. '
'"*from-url" can be used to download between the "start_time" and "end_time" extracted from the URL. '
'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
'e.g. --download-sections "*10:15-inf" --download-sections "intro"'))
downloader.add_option(
@ -1417,8 +1422,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'--clean-info-json', '--clean-infojson',
action='store_true', dest='clean_infojson', default=None,
help=(
'Remove some private fields such as filenames from the infojson. '
'Note that it could still contain some personal information (default)'))
'Remove some internal metadata such as filenames from the infojson (default)'))
filesystem.add_option(
'--no-clean-info-json', '--no-clean-infojson',
action='store_false', dest='clean_infojson',
@ -1681,8 +1685,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
'Execute a command, optionally prefixed with when to execute it, separated by a ":". '
'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). '
'Same syntax as the output template can be used to pass any field as arguments to the command. '
'After download, an additional field "filepath" that contains the final path of the downloaded file '
'is also available, and if no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '
'If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. '
'This option can be used multiple times'))
postproc.add_option(
'--no-exec',

View file

@ -187,7 +187,7 @@ def report_progress(self, s):
tmpl = progress_template.get('postprocess')
if tmpl:
self._downloader.to_screen(
self._downloader.evaluate_outtmpl(tmpl, progress_dict), skip_eol=True, quiet=False)
self._downloader.evaluate_outtmpl(tmpl, progress_dict), quiet=False)
self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',

View file

@ -149,7 +149,7 @@ def __init__(self, ydl, target=None):
f'You are switching to an {self.ydl._format_err("unofficial", "red")} executable '
f'from {self.ydl._format_err(self._target_repo, self.ydl.Styles.EMPHASIS)}. '
f'Run {self.ydl._format_err("at your own risk", "light red")}')
self.restart = self._blocked_restart
self._block_restart('Automatically restarting into custom builds is disabled for security reasons')
else:
self._target_repo = UPDATE_SOURCES.get(self.target_channel)
if not self._target_repo:
@ -294,6 +294,7 @@ def update(self):
if (_VERSION_RE.fullmatch(self.target_tag[5:])
and version_tuple(self.target_tag[5:]) < (2023, 3, 2)):
self.ydl.report_warning('You are downgrading to a version without --update-to')
self._block_restart('Cannot automatically restart to a version without --update-to')
directory = os.path.dirname(self.filename)
if not os.access(self.filename, os.W_OK):
@ -381,11 +382,11 @@ def restart(self):
_, _, returncode = Popen.run(self.cmd)
return returncode
def _blocked_restart(self):
self._report_error(
'Automatically restarting into custom builds is disabled for security reasons. '
'Restart yt-dlp to use the updated version', expected=True)
return self.ydl._download_retcode
def _block_restart(self, msg):
def wrapper():
self._report_error(f'{msg}. Restart yt-dlp to use the updated version', expected=True)
return self.ydl._download_retcode
self.restart = wrapper
def run_update(ydl):

View file

@ -6,7 +6,7 @@
import urllib.parse
import zlib
from ._utils import decode_base_n, preferredencoding
from ._utils import Popen, decode_base_n, preferredencoding
from .traversal import traverse_obj
from ..dependencies import certifi, websockets
@ -174,3 +174,7 @@ def handle_youtubedl_headers(headers):
del filtered_headers['Youtubedl-no-compression']
return filtered_headers
def process_communicate_or_kill(p, *args, **kwargs):
return Popen.communicate_or_kill(p, *args, **kwargs)

View file

@ -25,6 +25,7 @@
import locale
import math
import mimetypes
import netrc
import operator
import os
import platform
@ -864,10 +865,11 @@ def escapeHTML(text):
)
def process_communicate_or_kill(p, *args, **kwargs):
deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
return Popen.communicate_or_kill(p, *args, **kwargs)
class netrc_from_content(netrc.netrc):
def __init__(self, content):
self.hosts, self.macros = {}, {}
with io.StringIO(content) as stream:
self._parse('-', stream, False)
class Popen(subprocess.Popen):
@ -1654,7 +1656,7 @@ def unified_strdate(date_str, day_first=True):
def unified_timestamp(date_str, day_first=True, with_milliseconds=False):
if date_str is None:
if not isinstance(date_str, str):
return None
date_str = re.sub(r'\s+', ' ', re.sub(
@ -2446,13 +2448,16 @@ def request_to_url(req):
return req
def strftime_or_none(timestamp, date_format, default=None):
def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
datetime_object = None
try:
if isinstance(timestamp, (int, float)): # unix timestamp
# Using naive datetime here can break timestamp() in Windows
# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
# Also, datetime.datetime.fromtimestamp breaks for negative timestamps
# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
datetime_object = (datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
+ datetime.timedelta(seconds=timestamp))
elif isinstance(timestamp, str): # assume YYYYMMDD
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
date_format = re.sub( # Support %s on windows
@ -3304,7 +3309,7 @@ def q(qid):
'''
STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
def limit_length(s, length):
@ -3507,7 +3512,8 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
},
}
sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
sanitize_codec = functools.partial(
try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
for ext in preferences or COMPATIBLE_CODECS.keys():
@ -3753,12 +3759,10 @@ def _match_func(info_dict, incomplete=False):
class download_range_func:
def __init__(self, chapters, ranges):
self.chapters, self.ranges = chapters, ranges
def __init__(self, chapters, ranges, from_info=False):
self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
def __call__(self, info_dict, ydl):
if not self.ranges and not self.chapters:
yield {}
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
else 'Cannot match chapters since chapter information is unavailable')
@ -3770,7 +3774,23 @@ def __call__(self, info_dict, ydl):
if self.chapters and warning:
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
for start, end in self.ranges or []:
yield {
'start_time': self._handle_negative_timestamp(start, info_dict),
'end_time': self._handle_negative_timestamp(end, info_dict),
}
if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
yield {
'start_time': info_dict.get('start_time') or 0,
'end_time': info_dict.get('end_time') or float('inf'),
}
elif not self.ranges and not self.chapters:
yield {}
@staticmethod
def _handle_negative_timestamp(time, info):
return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
def __eq__(self, other):
return (isinstance(other, download_range_func)
@ -4152,6 +4172,7 @@ class ISO639Utils:
'or': 'ori',
'os': 'oss',
'pa': 'pan',
'pe': 'per',
'pi': 'pli',
'pl': 'pol',
'ps': 'pus',
@ -5673,6 +5694,7 @@ def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None)
return orderedSet(requested)
# TODO: Rewrite
class FormatSorter:
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
@ -5721,8 +5743,10 @@ class FormatSorter:
'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
'function': lambda it: next(filter(None, it), None)},
'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
'function': lambda it: next(filter(None, it), None)},
'ext': {'type': 'combined', 'field': ('vext', 'aext')},
'res': {'type': 'multiple', 'field': ('height', 'width'),
'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
@ -5953,13 +5977,15 @@ def calculate_preference(self, format):
format['preference'] = -100
# Determine missing bitrates
if format.get('tbr') is None:
if format.get('vbr') is not None and format.get('abr') is not None:
format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
else:
if format.get('vcodec') != 'none' and format.get('vbr') is None:
format['vbr'] = format.get('tbr') - format.get('abr', 0)
if format.get('acodec') != 'none' and format.get('abr') is None:
format['abr'] = format.get('tbr') - format.get('vbr', 0)
if format.get('vcodec') == 'none':
format['vbr'] = 0
if format.get('acodec') == 'none':
format['abr'] = 0
if not format.get('vbr') and format.get('vcodec') != 'none':
format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
if not format.get('abr') and format.get('acodec') != 'none':
format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
if not format.get('tbr'):
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
return tuple(self._calculate_field_preference(format, field) for field in self._order)

View file

@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
__version__ = '2023.03.04'
__version__ = '2023.06.22'
RELEASE_GIT_HEAD = '392389b7df7b818f794b231f14dc396d4875fbad'
RELEASE_GIT_HEAD = '812cdfa06c33a40e73a8e04b3e6f42c084666a43'
VARIANT = None