mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-02 06:27:21 +00:00
Merge branch 'yt-dlp:master' into viu-indonesia-fix-6482-partial
This commit is contained in:
commit
694de75a67
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
|
@ -525,6 +525,10 @@ jobs:
|
||||||
# make sure SHA sums are also printed to stdout
|
# make sure SHA sums are also printed to stdout
|
||||||
sha256sum -- * | tee ../SHA2-256SUMS
|
sha256sum -- * | tee ../SHA2-256SUMS
|
||||||
sha512sum -- * | tee ../SHA2-512SUMS
|
sha512sum -- * | tee ../SHA2-512SUMS
|
||||||
|
# also print as permanent annotations to the summary page
|
||||||
|
while read -r shasum; do
|
||||||
|
echo "::notice title=${shasum##* }::sha256: ${shasum% *}"
|
||||||
|
done < ../SHA2-256SUMS
|
||||||
|
|
||||||
- name: Make Update spec
|
- name: Make Update spec
|
||||||
run: |
|
run: |
|
||||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -51,7 +51,6 @@ cookies
|
||||||
*.srt
|
*.srt
|
||||||
*.ssa
|
*.ssa
|
||||||
*.swf
|
*.swf
|
||||||
*.swp
|
|
||||||
*.tt
|
*.tt
|
||||||
*.ttml
|
*.ttml
|
||||||
*.url
|
*.url
|
||||||
|
@ -119,6 +118,7 @@ yt-dlp.zip
|
||||||
.vscode
|
.vscode
|
||||||
*.sublime-*
|
*.sublime-*
|
||||||
*.code-workspace
|
*.code-workspace
|
||||||
|
*.swp
|
||||||
|
|
||||||
# Lazy extractors
|
# Lazy extractors
|
||||||
*/extractor/lazy_extractors.py
|
*/extractor/lazy_extractors.py
|
||||||
|
|
24
CONTRIBUTORS
24
CONTRIBUTORS
|
@ -631,3 +631,27 @@ voidful
|
||||||
vtexier
|
vtexier
|
||||||
WyohKnott
|
WyohKnott
|
||||||
trueauracoral
|
trueauracoral
|
||||||
|
ASertacAkkaya
|
||||||
|
axpauls
|
||||||
|
chilinux
|
||||||
|
hafeoz
|
||||||
|
JSubelj
|
||||||
|
jucor
|
||||||
|
megumintyan
|
||||||
|
mgedmin
|
||||||
|
Niluge-KiWi
|
||||||
|
peisenwang
|
||||||
|
TheZ3ro
|
||||||
|
tippfehlr
|
||||||
|
varunchopra
|
||||||
|
DrakoCpp
|
||||||
|
PatrykMis
|
||||||
|
DinhHuy2010
|
||||||
|
exterrestris
|
||||||
|
harbhim
|
||||||
|
LeSuisse
|
||||||
|
DunnesH
|
||||||
|
iancmy
|
||||||
|
mokrueger
|
||||||
|
luvyana
|
||||||
|
szantnerb
|
||||||
|
|
213
Changelog.md
213
Changelog.md
|
@ -4,6 +4,219 @@ # Changelog
|
||||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
### 2024.08.01
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- **utils**: `unified_timestamp`: [Recognize Sunday](https://github.com/yt-dlp/yt-dlp/commit/6daf2c27c0464fba98337be30de0b66d520d0db1) ([#10589](https://github.com/yt-dlp/yt-dlp/issues/10589)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **abematv**: [Fix availability extraction](https://github.com/yt-dlp/yt-dlp/commit/ef36d517f9b05785d61abca7691d9ab7d63cc75c) ([#10569](https://github.com/yt-dlp/yt-dlp/issues/10569)) by [middlingphys](https://github.com/middlingphys)
|
||||||
|
- **cbc.ca**: player: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/94a1c5e642e468cebeb51f74c6c220434cb47d96) ([#10302](https://github.com/yt-dlp/yt-dlp/issues/10302)) by [bashonly](https://github.com/bashonly), [trainman261](https://github.com/trainman261)
|
||||||
|
- **discoveryplus**: [Support olympics URLs](https://github.com/yt-dlp/yt-dlp/commit/0b7728618417e1aa382722a4d29b916b594d4459) ([#10566](https://github.com/yt-dlp/yt-dlp/issues/10566)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **kick**: clips: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/bb3936ae2b3ce96d0b53f9e17cad1082058f032b) ([#10572](https://github.com/yt-dlp/yt-dlp/issues/10572)) by [luvyana](https://github.com/luvyana)
|
||||||
|
- **learningonscreen**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/fe15d3178e242803ae7a934b90137f13598eba2e) ([#10590](https://github.com/yt-dlp/yt-dlp/issues/10590)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||||
|
- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7e3e4779ad13e4511c9ba3869879e53f0267bd7a) ([#10605](https://github.com/yt-dlp/yt-dlp/issues/10605)) by [szantnerb](https://github.com/szantnerb)
|
||||||
|
- **mlbtv**: [Fix makeup game extraction](https://github.com/yt-dlp/yt-dlp/commit/4b69e1b53ea21e631cd5dd68ff531e2f1671ec17) ([#10607](https://github.com/yt-dlp/yt-dlp/issues/10607)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **olympics**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2f1ddfe12a2c174bc777264c5c8ffe7ca0922d94) ([#10604](https://github.com/yt-dlp/yt-dlp/issues/10604)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **tva**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/28d485714fef88937c82635438afba5db81f9089) ([#10567](https://github.com/yt-dlp/yt-dlp/issues/10567)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **tver**: [Support olympic URLs](https://github.com/yt-dlp/yt-dlp/commit/5260696b1cba77161828941fdb38f09f14ac6c60) ([#10600](https://github.com/yt-dlp/yt-dlp/issues/10600)) by [vvto33](https://github.com/vvto33)
|
||||||
|
- **vimeo**: review: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/2b6df93a243bdfb9d6bb5c1e18020625cd02d465) ([#10598](https://github.com/yt-dlp/yt-dlp/issues/10598)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **youtube**
|
||||||
|
- [Change default player clients to `ios,tv`](https://github.com/yt-dlp/yt-dlp/commit/efb42763dec23ccf6a2e3bac3afbfefce8efd012) ([#10457](https://github.com/yt-dlp/yt-dlp/issues/10457)) by [seproDev](https://github.com/seproDev)
|
||||||
|
- [Fix `n` function name extraction for player `20dfca59`](https://github.com/yt-dlp/yt-dlp/commit/011b4a04db2a636c3ef0a0ad4e2d3ae482c9fd76) ([#10611](https://github.com/yt-dlp/yt-dlp/issues/10611)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Fix age-verification workaround](https://github.com/yt-dlp/yt-dlp/commit/d19fcb934269465fd707e68a87f735ec6983e93d) ([#10610](https://github.com/yt-dlp/yt-dlp/issues/10610)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||||
|
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/0e539617a41913c7da1edd74fb6543c10ad727b3) ([#10573](https://github.com/yt-dlp/yt-dlp/issues/10573)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **cleanup**: Miscellaneous: [ffd7781](https://github.com/yt-dlp/yt-dlp/commit/ffd7781d6588926f820b44a34b9e6e3068fb9f97) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
### 2024.07.25
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **abematv**: [Adapt key retrieval to request handler framework](https://github.com/yt-dlp/yt-dlp/commit/a3bab4752a2b3d56e5a59b4e0411bb8f695c010b) ([#10491](https://github.com/yt-dlp/yt-dlp/issues/10491)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **facebook**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1a34a802f44a1dab8f642c79c3cc810e21541d3b) ([#10531](https://github.com/yt-dlp/yt-dlp/issues/10531)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **mlbtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f0993391e6052ec8f7aacc286609564f226943b9) ([#10515](https://github.com/yt-dlp/yt-dlp/issues/10515)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **tiktok**: [Fix and deprioritize JSON subtitles](https://github.com/yt-dlp/yt-dlp/commit/2f97779f335ac069ecccd9c7bf81abf4a83cfe7a) ([#10516](https://github.com/yt-dlp/yt-dlp/issues/10516)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **vimeo**: [Fix chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/a0a1bc3d8d8e3bb9a48a06e835815a0460e90e77) ([#10544](https://github.com/yt-dlp/yt-dlp/issues/10544)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **youtube**: [Fix `n` function name extraction for player `3400486c`](https://github.com/yt-dlp/yt-dlp/commit/713b4cd18f00556771af8cfdd9cea6cc1a09e948) ([#10542](https://github.com/yt-dlp/yt-dlp/issues/10542)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **build**: [Pin `setuptools` version](https://github.com/yt-dlp/yt-dlp/commit/e046db8a116b1c320d4785daadd48ea0b22a3987) ([#10493](https://github.com/yt-dlp/yt-dlp/issues/10493)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
### 2024.07.16
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- [Fix `noprogress` if `test=True` with `--quiet` and `--verbose`](https://github.com/yt-dlp/yt-dlp/commit/66ce3d76d87af3f81cc9dfec4be4704016cb1cdb) ([#10454](https://github.com/yt-dlp/yt-dlp/issues/10454)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
- [Support `auto-tty` and `no_color-tty` for `--color`](https://github.com/yt-dlp/yt-dlp/commit/d9cbced493cae2008508d94a2db5dd98be7c01fc) ([#10453](https://github.com/yt-dlp/yt-dlp/issues/10453)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
- **update**: [Fix network error handling](https://github.com/yt-dlp/yt-dlp/commit/ed1b9ed93dd90d2cc960c0d8eaa9d919db224203) ([#10486](https://github.com/yt-dlp/yt-dlp/issues/10486)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **utils**: `parse_codecs`: [Fix parsing of mixed case codec strings](https://github.com/yt-dlp/yt-dlp/commit/cc0070f6496e501d77352bad475fb02d6a86846a) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **adn**: [Adjust for .com domain change](https://github.com/yt-dlp/yt-dlp/commit/959b7a379b8e5da059d110a63339c964b6265736) ([#10399](https://github.com/yt-dlp/yt-dlp/issues/10399)) by [infanf](https://github.com/infanf)
|
||||||
|
- **afreecatv**: [Fix login and use `legacy_ssl`](https://github.com/yt-dlp/yt-dlp/commit/4cd41469243624d90b7a2009b95cbe0609343efe) ([#10440](https://github.com/yt-dlp/yt-dlp/issues/10440)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **box**: [Support enterprise URLs](https://github.com/yt-dlp/yt-dlp/commit/705f5b84dec75cc7af97f42fd1530e8062735970) ([#10419](https://github.com/yt-dlp/yt-dlp/issues/10419)) by [seproDev](https://github.com/seproDev)
|
||||||
|
- **digitalconcerthall**: [Extract HEVC and FLAC formats](https://github.com/yt-dlp/yt-dlp/commit/e62fa6b0e0186f8c5666c2c5ab64cf191abdafc1) ([#10470](https://github.com/yt-dlp/yt-dlp/issues/10470)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **dplay**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/39e6c4cb44b9292e89ac0afec3cd0afc2ae8775f) ([#10471](https://github.com/yt-dlp/yt-dlp/issues/10471)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **epidemicsound**: [Support sound effects URLs](https://github.com/yt-dlp/yt-dlp/commit/8531d2b03bac9cc746f2ee8098aaf8f115505f5b) ([#10436](https://github.com/yt-dlp/yt-dlp/issues/10436)) by [iancmy](https://github.com/iancmy)
|
||||||
|
- **generic**: [Fix direct video link extensions](https://github.com/yt-dlp/yt-dlp/commit/b9afb99e7c34d0eb15ddc6689cd7d20eebfda68e) ([#10468](https://github.com/yt-dlp/yt-dlp/issues/10468)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **picarto**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bacd18b7df08b4995644fd12cee1f8c8e8636bc7) ([#10414](https://github.com/yt-dlp/yt-dlp/issues/10414)) by [Frankgoji](https://github.com/Frankgoji)
|
||||||
|
- **soundcloud**: permalink, user: [Extract tracks only](https://github.com/yt-dlp/yt-dlp/commit/22870b81bad97dfa6307a7add44753b2dffc76a9) ([#10463](https://github.com/yt-dlp/yt-dlp/issues/10463)) by [DunnesH](https://github.com/DunnesH)
|
||||||
|
- **tiktok**: live: [Fix room ID extraction](https://github.com/yt-dlp/yt-dlp/commit/d2189d3d36987ebeac426fd70a60a5fe86325a2b) ([#10408](https://github.com/yt-dlp/yt-dlp/issues/10408)) by [mokrueger](https://github.com/mokrueger)
|
||||||
|
- **tv5monde**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/9b95a6765a5f6325af99c4aca961587f0c426e8c) ([#10417](https://github.com/yt-dlp/yt-dlp/issues/10417)) by [bashonly](https://github.com/bashonly) (With fixes in [cc1a309](https://github.com/yt-dlp/yt-dlp/commit/cc1a3098c00995c6aebc2a16bd1050a66bad64db))
|
||||||
|
- **youtube**
|
||||||
|
- [Avoid poToken experiment player responses](https://github.com/yt-dlp/yt-dlp/commit/8b8b442cb005a8d85315f301615f83fb736b967a) ([#10456](https://github.com/yt-dlp/yt-dlp/issues/10456)) by [seproDev](https://github.com/seproDev) (With fixes in [16da8ef](https://github.com/yt-dlp/yt-dlp/commit/16da8ef9937ff76632dfef02e5062c5ba99c8ea2))
|
||||||
|
- [Invalidate nsig cache from < 2024.07.09](https://github.com/yt-dlp/yt-dlp/commit/04e17ba20a139f1b3e30ec4bafa3fba26888f0b3) ([#10401](https://github.com/yt-dlp/yt-dlp/issues/10401)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Reduce android client priority](https://github.com/yt-dlp/yt-dlp/commit/b85eef0a615a01304f88a3847309c667e09a20df) ([#10467](https://github.com/yt-dlp/yt-dlp/issues/10467)) by [seproDev](https://github.com/seproDev)
|
||||||
|
|
||||||
|
#### Networking changes
|
||||||
|
- [Add `legacy_ssl` request extension](https://github.com/yt-dlp/yt-dlp/commit/150ecc45d9cacc919550c13b04fd998ac5103a6b) ([#10448](https://github.com/yt-dlp/yt-dlp/issues/10448)) by [coletdjnz](https://github.com/coletdjnz)
|
||||||
|
- **Request Handler**: curl_cffi: [Support `curl_cffi` 0.7.X](https://github.com/yt-dlp/yt-dlp/commit/42bfca00a6b460fc053514cdd7ac6f5b5daddf0c) by [coletdjnz](https://github.com/coletdjnz)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **build**
|
||||||
|
- [Include `curl_cffi` in `yt-dlp_linux`](https://github.com/yt-dlp/yt-dlp/commit/4521f30d1479315cd5c3bf4abdad19391952df98) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Pin `curl-cffi` to 0.5.10 for Windows](https://github.com/yt-dlp/yt-dlp/commit/ac30941ae682f71eab010877c9a977736a61d3cf) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **cleanup**: Miscellaneous: [89a161e](https://github.com/yt-dlp/yt-dlp/commit/89a161e8c62569a662deda1c948664152efcb6b4) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
### 2024.07.09
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- [Do not alter default format selection when simulated](https://github.com/yt-dlp/yt-dlp/commit/0b570f2a90ce2363ba06089217514d644e7be2e0) ([#9862](https://github.com/yt-dlp/yt-dlp/issues/9862)) by [seproDev](https://github.com/seproDev)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **youtube**: [Remove broken `n` function extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/7ead7332af69422cee931aec3faa277288e9e212) ([#10396](https://github.com/yt-dlp/yt-dlp/issues/10396)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
|
||||||
|
|
||||||
|
### 2024.07.08
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- **jsinterp**: [Implement `Function.prototype` resolving for `call` and `apply`](https://github.com/yt-dlp/yt-dlp/commit/6c056ea7aeb03660281653a9668547f2548f194f) ([#10392](https://github.com/yt-dlp/yt-dlp/issues/10392)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **soundcloud**: [Fix rate-limit handling](https://github.com/yt-dlp/yt-dlp/commit/4b50b292cc98534fb8c7cdf0ae5cb85862f7ebfc) ([#10389](https://github.com/yt-dlp/yt-dlp/issues/10389)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **youtube**: [Fix JS `n` function name extraction](https://github.com/yt-dlp/yt-dlp/commit/297b0a379282a15c80d82d51f3757c961db2dae1) ([#10390](https://github.com/yt-dlp/yt-dlp/issues/10390)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||||
|
|
||||||
|
### 2024.07.07
|
||||||
|
|
||||||
|
#### Important changes
|
||||||
|
- Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785)
|
||||||
|
- A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- [Address gaps in allowed extensions](https://github.com/yt-dlp/yt-dlp/commit/2469119490d7e0397ebbf5c5ae327316f955eef2) ([#10362](https://github.com/yt-dlp/yt-dlp/issues/10362)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Fix `--ignore-no-formats-error`](https://github.com/yt-dlp/yt-dlp/commit/cc767e9490056efaaa11c186b0d032e4b4969180) ([#10345](https://github.com/yt-dlp/yt-dlp/issues/10345)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **abematv**: [Extract availability](https://github.com/yt-dlp/yt-dlp/commit/2a1a1b8e67e864289ac7ba5d05ec63dbb19a639f) ([#10348](https://github.com/yt-dlp/yt-dlp/issues/10348)) by [middlingphys](https://github.com/middlingphys)
|
||||||
|
- **chzzk**: [Extract with API v3](https://github.com/yt-dlp/yt-dlp/commit/4862a29854d4044120e3f97b52199711ad04bee1) ([#10363](https://github.com/yt-dlp/yt-dlp/issues/10363)) by [hui1601](https://github.com/hui1601)
|
||||||
|
- **douyutv**: [Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/commit/6075a029dba70a89675ae1250e7cdfd91f0eba41) ([#10347](https://github.com/yt-dlp/yt-dlp/issues/10347)) by [LeSuisse](https://github.com/LeSuisse)
|
||||||
|
- **jiosaavn**: playlist: [Support featured playlists](https://github.com/yt-dlp/yt-dlp/commit/f0f867f008a1728f5f6ac1224b9e014b5d27f817) ([#10382](https://github.com/yt-dlp/yt-dlp/issues/10382)) by [harbhim](https://github.com/harbhim)
|
||||||
|
- **vidyard**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/00766ece0c5c7a80781a4ff677198c5fb69d9dc0) ([#10155](https://github.com/yt-dlp/yt-dlp/issues/10155)) by [exterrestris](https://github.com/exterrestris)
|
||||||
|
- **vimeo**: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/c1c9bb4adb42d0d93a2fb5d93a7de0a87b6ba884) ([#10341](https://github.com/yt-dlp/yt-dlp/issues/10341)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **vtv**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/987a1f94c24275f2b0cd82e719956687415dd732) ([#10173](https://github.com/yt-dlp/yt-dlp/issues/10173)) by [DinhHuy2010](https://github.com/DinhHuy2010)
|
||||||
|
- **yle_areena**
|
||||||
|
- [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/4cdc976bd861b5835601ae402bef543eacd88f3d) ([#10380](https://github.com/yt-dlp/yt-dlp/issues/10380)) by [seproDev](https://github.com/seproDev)
|
||||||
|
- [Fix subtitle extraction](https://github.com/yt-dlp/yt-dlp/commit/0d174e8bed32081eb38ef7f5d1a1282ae154f517) ([#10379](https://github.com/yt-dlp/yt-dlp/issues/10379)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **cleanup**: Miscellaneous: [b337d29](https://github.com/yt-dlp/yt-dlp/commit/b337d2989ce0614651d363383f6f743d977248ef) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
### 2024.07.02
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- [Fix `--compat-opt allow-unsafe-ext`](https://github.com/yt-dlp/yt-dlp/commit/773bbb181506856ffda95496ab60c1c9603f1f71) ([#10336](https://github.com/yt-dlp/yt-dlp/issues/10336)) by [bashonly](https://github.com/bashonly), [rdamas](https://github.com/rdamas)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **banbye**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7509791385ba88cb7ec0ab17e826681f4af4b66e) ([#10332](https://github.com/yt-dlp/yt-dlp/issues/10332)) by [PatrykMis](https://github.com/PatrykMis), [seproDev](https://github.com/seproDev)
|
||||||
|
- **murrtube**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6403530e2dfe259a87afe444708c4f3024cc45b8) ([#9249](https://github.com/yt-dlp/yt-dlp/issues/9249)) by [DrakoCpp](https://github.com/DrakoCpp)
|
||||||
|
- **zaiko**: [Support JWT video URLs](https://github.com/yt-dlp/yt-dlp/commit/7799e518956387bb3c1064c9beae26eab8d5044a) ([#10130](https://github.com/yt-dlp/yt-dlp/issues/10130)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||||
|
|
||||||
|
#### Postprocessor changes
|
||||||
|
- **embedthumbnail**: [Fix embedding with mutagen](https://github.com/yt-dlp/yt-dlp/commit/d502f4c6d95b74896f40070d07229997f0850f31) ([#10337](https://github.com/yt-dlp/yt-dlp/issues/10337)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **cleanup**: Miscellaneous: [93d33cb](https://github.com/yt-dlp/yt-dlp/commit/93d33cb29af9e2e84369ac43589d50ce8e0160ef) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
### 2024.07.01
|
||||||
|
|
||||||
|
#### Important changes
|
||||||
|
- Security: [[CVE-2024-38519](https://nvd.nist.gov/vuln/detail/CVE-2024-38519)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j)
|
||||||
|
- Unsafe extensions are now blocked from being downloaded
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- [Add `playlist_channel` and `playlist_channel_id` fields](https://github.com/yt-dlp/yt-dlp/commit/55e3e6fd21e741ec5ae3d8624de5e5ea345810eb) ([#10266](https://github.com/yt-dlp/yt-dlp/issues/10266)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Disallow unsafe extensions (CVE-2024-38519)](https://github.com/yt-dlp/yt-dlp/commit/5ce582448ececb8d9c30c8c31f58330090ced03a) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
- **cookies**: [Fix `--cookies-from-browser` DE detection on Linux](https://github.com/yt-dlp/yt-dlp/commit/a8520244b8642880e4d35925e9e49eff94d548de) ([#10237](https://github.com/yt-dlp/yt-dlp/issues/10237)) by [peisenwang](https://github.com/peisenwang)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **afreecatv**
|
||||||
|
- [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/e8352ad6599de7b5371dc39a1a1edc7890aaedb4) ([#10174](https://github.com/yt-dlp/yt-dlp/issues/10174)) by [hui1601](https://github.com/hui1601)
|
||||||
|
- catchstory: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/054a3ba7d1293f9fbe21800d62d1e5ddcbded238) ([#10235](https://github.com/yt-dlp/yt-dlp/issues/10235)) by [hui1601](https://github.com/hui1601)
|
||||||
|
- **bilibili**: [Support legacy formats](https://github.com/yt-dlp/yt-dlp/commit/1d6ab17d0752ee9cf19e3e63c7dec7b600d3f228) ([#9117](https://github.com/yt-dlp/yt-dlp/issues/9117)) by [c-basalt](https://github.com/c-basalt), [GD-Slime](https://github.com/GD-Slime)
|
||||||
|
- **bitchute**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/5b1a2aa978d0074cee278e7659f32f52ecc4ab53) ([#10301](https://github.com/yt-dlp/yt-dlp/issues/10301)) by [seproDev](https://github.com/seproDev)
|
||||||
|
- **brightcove**: [Upgrade requests to HTTPS](https://github.com/yt-dlp/yt-dlp/commit/90c3721a322756bb7f4ca10ceb73744500bee37e) ([#10202](https://github.com/yt-dlp/yt-dlp/issues/10202)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **cloudflarestream**: [Fix `_VALID_URL` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/7aa322c02cec54eb77154a89da7e400194f0bd03) ([#10215](https://github.com/yt-dlp/yt-dlp/issues/10215)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **cloudycdn**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/b758877afa225747fba81c8a580e27583a231734) ([#10271](https://github.com/yt-dlp/yt-dlp/issues/10271)) by [Caesim404](https://github.com/Caesim404)
|
||||||
|
- **digitalconcerthall**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/2a4f2e82dbeeb0c9130883c83dac689d5260c871) ([#10152](https://github.com/yt-dlp/yt-dlp/issues/10152)) by [seproDev](https://github.com/seproDev), [tippfehlr](https://github.com/tippfehlr)
|
||||||
|
- **facebook**: reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8ca1d57ed08d00efa117820a5a82f763b20e2d1d) ([#10232](https://github.com/yt-dlp/yt-dlp/issues/10232)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **francetv**
|
||||||
|
- [Detect and raise errors for DRM](https://github.com/yt-dlp/yt-dlp/commit/3690c2f59827c79a1bbe388a7c1ae75db7477db2) ([#10165](https://github.com/yt-dlp/yt-dlp/issues/10165)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/081708d6074dfbb907e25af61ba530bba0d4b31d) ([#10177](https://github.com/yt-dlp/yt-dlp/issues/10177)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **generic**: [Add `key_query` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/5dbac313ae4e3e8521dfe2e1a6a048a98ff4b4fe) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **graspop**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1d369b4096d79233e0ac2c93762746a64d7a69c8) ([#10268](https://github.com/yt-dlp/yt-dlp/issues/10268)) by [Niluge-KiWi](https://github.com/Niluge-KiWi)
|
||||||
|
- **jiocinema**: series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/61714f46956f61612032bba857aed7ad1387eccd) ([#10139](https://github.com/yt-dlp/yt-dlp/issues/10139)) by [varunchopra](https://github.com/varunchopra)
|
||||||
|
- **khanacademy**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4093eb1fcc29a0e2aea9adfcba479787d9ae0c0c) ([#9136](https://github.com/yt-dlp/yt-dlp/issues/9136)) by [c-basalt](https://github.com/c-basalt)
|
||||||
|
- **laracasts**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b8da8a98f897599095d4ef1644b8c5fd39921118) ([#10055](https://github.com/yt-dlp/yt-dlp/issues/10055)) by [ASertacAkkaya](https://github.com/ASertacAkkaya), [seproDev](https://github.com/seproDev)
|
||||||
|
- **matchtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f3411af12e209bc5624e1ac31271b8aabe2d3c90) ([#10190](https://github.com/yt-dlp/yt-dlp/issues/10190)) by [megumintyan](https://github.com/megumintyan)
|
||||||
|
- **mediasite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0953209a857c51648aee89d205c086b0e1dd3864) ([#10273](https://github.com/yt-dlp/yt-dlp/issues/10273)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **microsoftembed**: [Add extractors for dev materials](https://github.com/yt-dlp/yt-dlp/commit/9200bc70c94546b2191bb6fbfc9cea98a919cc56) ([#9177](https://github.com/yt-dlp/yt-dlp/issues/9177)) by [c-basalt](https://github.com/c-basalt)
|
||||||
|
- **mlbtv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/61edf57f8f13f6dfd81154174e647eb5fdd26089) ([#10296](https://github.com/yt-dlp/yt-dlp/issues/10296)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **neteasemusic**: [Extract more formats from new API](https://github.com/yt-dlp/yt-dlp/commit/7a03f88c40b80d3cf54f68edd9d4bdd6aa527570) ([#10258](https://github.com/yt-dlp/yt-dlp/issues/10258)) by [hafeoz](https://github.com/hafeoz)
|
||||||
|
- **nhkradiru**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b8e2a5e0e1030076f833917906e19bb6c7b318f6) ([#10106](https://github.com/yt-dlp/yt-dlp/issues/10106)) by [garret1317](https://github.com/garret1317)
|
||||||
|
- **nuum**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/aefede25561a06cba398d4f593eee2fbe942693b) ([#10316](https://github.com/yt-dlp/yt-dlp/issues/10316)) by [DmitryScaletta](https://github.com/DmitryScaletta)
|
||||||
|
- **orf**
|
||||||
|
- on
|
||||||
|
- [Add `prefer_segments_playlist` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/e6a22834df1776ec4e486526f6df2bf53cb7e06f) ([#10314](https://github.com/yt-dlp/yt-dlp/issues/10314)) by [seproDev](https://github.com/seproDev)
|
||||||
|
- [Support segmented episodes](https://github.com/yt-dlp/yt-dlp/commit/8b46ad4d8b8ee8c5472af0cde863baa89ca3f425) ([#10053](https://github.com/yt-dlp/yt-dlp/issues/10053)) by [seproDev](https://github.com/seproDev)
|
||||||
|
- **patreoncampaign**: [Fix `campaign_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5a47da400b645aadbda6afd1156bd89c744f48) ([#10070](https://github.com/yt-dlp/yt-dlp/issues/10070)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **podbayfm**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/d4b52ce3fcb8d9578ed12365648eaba8718c603e) ([#10195](https://github.com/yt-dlp/yt-dlp/issues/10195)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||||
|
- **pokergo**: [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/36e8dd832579b5375a0f6626af4268b86b4eb21a) ([#10319](https://github.com/yt-dlp/yt-dlp/issues/10319)) by [axpauls](https://github.com/axpauls)
|
||||||
|
- **qqmusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4f5d7be3c5590bb257d8ff521572aee9839ab754) ([#9768](https://github.com/yt-dlp/yt-dlp/issues/9768)) by [c-basalt](https://github.com/c-basalt)
|
||||||
|
- **rtvslo.si**: show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/92a1c4abaeeba9a69d611c57b73555cb1a1f00ad) ([#8418](https://github.com/yt-dlp/yt-dlp/issues/8418)) by [JSubelj](https://github.com/JSubelj), [seproDev](https://github.com/seproDev)
|
||||||
|
- **soundcloud**: [Fix `download` format extraction](https://github.com/yt-dlp/yt-dlp/commit/e53e56b73543799638fa6abb0c78f8b091aa84e1) ([#10125](https://github.com/yt-dlp/yt-dlp/issues/10125)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **sproutvideo**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/d6c2c2bc84f1434255be5c73baeb17d893d2c0d4) ([#10098](https://github.com/yt-dlp/yt-dlp/issues/10098)) by [bashonly](https://github.com/bashonly), [TheZ3ro](https://github.com/TheZ3ro)
|
||||||
|
- **tiktok**
|
||||||
|
- [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/ea88129784fcbb6987161df9ba05909325d8e2e9) ([#10124](https://github.com/yt-dlp/yt-dlp/issues/10124)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/96472d72f29550c25c5dcedcde02c38c192b0011) ([#10216](https://github.com/yt-dlp/yt-dlp/issues/10216)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **tubitv**
|
||||||
|
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/bef9a9e5361fd7a72e21d0f1a8c8afb70d89e8c5) ([#9975](https://github.com/yt-dlp/yt-dlp/issues/9975)) by [chilinux](https://github.com/chilinux)
|
||||||
|
- series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d7d861811c15585a4f7ec9d5ae68d2ac28de28a0) ([#10116](https://github.com/yt-dlp/yt-dlp/issues/10116)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **vimeo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/d4b99a233314bf31f9c842035ea9884673d5313a) ([#10327](https://github.com/yt-dlp/yt-dlp/issues/10327)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **youtube**
|
||||||
|
- [Extract all formats from multi-language m3u8s](https://github.com/yt-dlp/yt-dlp/commit/9bd85019931927a99b0fe0dc58ac51acca9fbe72) ([#9875](https://github.com/yt-dlp/yt-dlp/issues/9875)) by [bashonly](https://github.com/bashonly), [clienthax](https://github.com/clienthax)
|
||||||
|
- [Skip formats if nsig decoding fails](https://github.com/yt-dlp/yt-dlp/commit/800ec085ccf98420584d8bb38c20a2c079669b09) ([#10223](https://github.com/yt-dlp/yt-dlp/issues/10223)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Suppress "Unavailable videos are hidden" warning](https://github.com/yt-dlp/yt-dlp/commit/24f3097ea9a470a984d0454dc013cafa2325f5f8) ([#10159](https://github.com/yt-dlp/yt-dlp/issues/10159)) by [mgedmin](https://github.com/mgedmin)
|
||||||
|
- tab: [Fix channel metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/a0d9967f6822fc279e86bce33464194985148727) ([#10071](https://github.com/yt-dlp/yt-dlp/issues/10071)) by [bashonly](https://github.com/bashonly), [shoxie007](https://github.com/shoxie007)
|
||||||
|
|
||||||
|
#### Downloader changes
|
||||||
|
- **hls**: [Apply `extra_param_to_key_url` from info dict](https://github.com/yt-dlp/yt-dlp/commit/ca8885edd93bdf8912af6c22ee335b6222cb9ba9) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Postprocessor changes
|
||||||
|
- **embedthumbnail**: [Fix postprocessor](https://github.com/yt-dlp/yt-dlp/commit/f2a4ea1794718e4dc0148bc172cb877f1080903b) ([#10248](https://github.com/yt-dlp/yt-dlp/issues/10248)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
|
||||||
|
#### Networking changes
|
||||||
|
- **Request Handler**: requests: [Bump minimum `requests` version to 2.32.2](https://github.com/yt-dlp/yt-dlp/commit/db50f19d76c6870a5a13d0cab9287d684fd7449a) ([#10079](https://github.com/yt-dlp/yt-dlp/issues/10079)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **build**
|
||||||
|
- [Bump Pyinstaller to `>=6.7.0` for all builds](https://github.com/yt-dlp/yt-dlp/commit/5fdd13006a1c5d78642c8d3c4c7df0448273c2ae) ([#10069](https://github.com/yt-dlp/yt-dlp/issues/10069)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||||
|
- [Cache dependencies for `macos` job](https://github.com/yt-dlp/yt-dlp/commit/46c1b7cfec1d0e6155083ca7e6948674c64ecb97) ([#10088](https://github.com/yt-dlp/yt-dlp/issues/10088)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Use `macos-12` image for `yt-dlp_macos`](https://github.com/yt-dlp/yt-dlp/commit/03334d639d5282cd4107edb32c623ba400262fc4) ([#10063](https://github.com/yt-dlp/yt-dlp/issues/10063)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **cleanup**
|
||||||
|
- [Add more ruff rules](https://github.com/yt-dlp/yt-dlp/commit/add96eb9f84cfffe85682bf2fb85135746994ee8) ([#10149](https://github.com/yt-dlp/yt-dlp/issues/10149)) by [seproDev](https://github.com/seproDev)
|
||||||
|
- [Bump ruff to 0.5.x](https://github.com/yt-dlp/yt-dlp/commit/7814c50948a2b9a4c746441ecbc509ae563d5d1f) ([#10282](https://github.com/yt-dlp/yt-dlp/issues/10282)) by [seproDev](https://github.com/seproDev)
|
||||||
|
- Miscellaneous: [6aaf96a](https://github.com/yt-dlp/yt-dlp/commit/6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt), [jucor](https://github.com/jucor), [seproDev](https://github.com/seproDev)
|
||||||
|
- **test**: download: [Raise on network errors](https://github.com/yt-dlp/yt-dlp/commit/54a63e80af82791d2f0985bd0176bb182963fd5f) ([#10283](https://github.com/yt-dlp/yt-dlp/issues/10283)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||||
|
|
||||||
### 2024.05.27
|
### 2024.05.27
|
||||||
|
|
||||||
#### Extractor changes
|
#### Extractor changes
|
||||||
|
|
|
@ -61,3 +61,10 @@ ## [Grub4K](https://github.com/Grub4K)
|
||||||
* Reworked internals like `traverse_obj`, various core refactors and bugs fixes
|
* Reworked internals like `traverse_obj`, various core refactors and bugs fixes
|
||||||
* Implemented proper progress reporting for parallel downloads
|
* Implemented proper progress reporting for parallel downloads
|
||||||
* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc
|
* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc
|
||||||
|
|
||||||
|
|
||||||
|
## [sepro](https://github.com/seproDev)
|
||||||
|
|
||||||
|
* UX improvements: Warn when ffmpeg is missing, warn when double-clicking exe
|
||||||
|
* Code cleanup: Remove dead extractors, mark extractors as broken, enable/apply ruff rules
|
||||||
|
* Improved/fixed/added ArdMediathek, DRTV, Floatplane, MagentaMusik, Naver, Nebula, OnDemandKorea, Vbox7 etc
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -21,7 +21,7 @@ clean-test:
|
||||||
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||||
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
|
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
|
||||||
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \
|
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \
|
||||||
*.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
*.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||||
clean-dist:
|
clean-dist:
|
||||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
|
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
|
||||||
|
|
92
README.md
92
README.md
|
@ -141,7 +141,7 @@ ## UPDATE
|
||||||
|
|
||||||
If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
|
If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
|
||||||
|
|
||||||
For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation
|
For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer to their documentation
|
||||||
|
|
||||||
<a id="update-channels"></a>
|
<a id="update-channels"></a>
|
||||||
|
|
||||||
|
@ -184,7 +184,7 @@ ## DEPENDENCIES
|
||||||
|
|
||||||
### Strongly recommended
|
### Strongly recommended
|
||||||
|
|
||||||
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
|
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection), as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
|
||||||
|
|
||||||
There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||||
|
|
||||||
|
@ -202,7 +202,7 @@ #### Impersonation
|
||||||
|
|
||||||
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
||||||
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
||||||
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
|
* Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds
|
||||||
|
|
||||||
|
|
||||||
### Metadata
|
### Metadata
|
||||||
|
@ -275,7 +275,7 @@ ### Standalone Py2Exe Builds (Windows)
|
||||||
### Related scripts
|
### Related scripts
|
||||||
|
|
||||||
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
|
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
|
||||||
* **`devscripts/update-version.py`** - Update the version number based on current date.
|
* **`devscripts/update-version.py`** - Update the version number based on the current date.
|
||||||
* **`devscripts/set-variant.py`** - Set the build variant of the executable.
|
* **`devscripts/set-variant.py`** - Set the build variant of the executable.
|
||||||
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
|
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
|
||||||
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
|
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
|
||||||
|
@ -368,7 +368,9 @@ ## General Options:
|
||||||
stderr) to apply the setting to. Can be one
|
stderr) to apply the setting to. Can be one
|
||||||
of "always", "auto" (default), "never", or
|
of "always", "auto" (default), "never", or
|
||||||
"no_color" (use non color terminal
|
"no_color" (use non color terminal
|
||||||
sequences). Can be used multiple times
|
sequences). Use "auto-tty" or "no_color-tty"
|
||||||
|
to decide based on terminal support only.
|
||||||
|
Can be used multiple times
|
||||||
--compat-options OPTS Options that can help keep compatibility
|
--compat-options OPTS Options that can help keep compatibility
|
||||||
with youtube-dl or youtube-dlc
|
with youtube-dl or youtube-dlc
|
||||||
configurations by reverting some of the
|
configurations by reverting some of the
|
||||||
|
@ -456,8 +458,8 @@ ## Video Selection:
|
||||||
is not present, and "&" to check multiple
|
is not present, and "&" to check multiple
|
||||||
conditions. Use a "\" to escape "&" or
|
conditions. Use a "\" to escape "&" or
|
||||||
quotes if needed. If used multiple times,
|
quotes if needed. If used multiple times,
|
||||||
the filter matches if atleast one of the
|
the filter matches if at least one of the
|
||||||
conditions are met. E.g. --match-filter
|
conditions is met. E.g. --match-filter
|
||||||
!is_live --match-filter "like_count>?100 &
|
!is_live --match-filter "like_count>?100 &
|
||||||
description~='(?i)\bcats \& dogs\b'" matches
|
description~='(?i)\bcats \& dogs\b'" matches
|
||||||
only videos that are not live OR those that
|
only videos that are not live OR those that
|
||||||
|
@ -674,7 +676,7 @@ ## Filesystem Options:
|
||||||
PROFILE to load cookies from, and the
|
PROFILE to load cookies from, and the
|
||||||
CONTAINER name (if Firefox) ("none" for no
|
CONTAINER name (if Firefox) ("none" for no
|
||||||
container) can be given with their
|
container) can be given with their
|
||||||
respective seperators. By default, all
|
respective separators. By default, all
|
||||||
containers of the most recently accessed
|
containers of the most recently accessed
|
||||||
profile are used. Currently supported
|
profile are used. Currently supported
|
||||||
keyrings are: basictext, gnomekeyring,
|
keyrings are: basictext, gnomekeyring,
|
||||||
|
@ -1036,7 +1038,7 @@ ## Post-Processing Options:
|
||||||
--print/--output), "before_dl" (before each
|
--print/--output), "before_dl" (before each
|
||||||
video download), "post_process" (after each
|
video download), "post_process" (after each
|
||||||
video download; default), "after_move"
|
video download; default), "after_move"
|
||||||
(after moving video file to it's final
|
(after moving video file to its final
|
||||||
locations), "after_video" (after downloading
|
locations), "after_video" (after downloading
|
||||||
and processing all formats of a video), or
|
and processing all formats of a video), or
|
||||||
"playlist" (at end of playlist). This option
|
"playlist" (at end of playlist). This option
|
||||||
|
@ -1125,7 +1127,7 @@ # CONFIGURATION
|
||||||
* `/etc/yt-dlp/config`
|
* `/etc/yt-dlp/config`
|
||||||
* `/etc/yt-dlp/config.txt`
|
* `/etc/yt-dlp/config.txt`
|
||||||
|
|
||||||
E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
||||||
```
|
```
|
||||||
# Lines starting with # are comments
|
# Lines starting with # are comments
|
||||||
|
|
||||||
|
@ -1142,7 +1144,7 @@ # Save all videos under YouTube directory in your home directory
|
||||||
-o ~/YouTube/%(title)s.%(ext)s
|
-o ~/YouTube/%(title)s.%(ext)s
|
||||||
```
|
```
|
||||||
|
|
||||||
**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell.
|
**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell.
|
||||||
|
|
||||||
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
|
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
|
||||||
|
|
||||||
|
@ -1154,12 +1156,12 @@ ### Configuration file encoding
|
||||||
|
|
||||||
### Authentication with netrc
|
### Authentication with netrc
|
||||||
|
|
||||||
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
|
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that, you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
|
||||||
```
|
```
|
||||||
touch ${HOME}/.netrc
|
touch ${HOME}/.netrc
|
||||||
chmod a-rwx,u+rw ${HOME}/.netrc
|
chmod a-rwx,u+rw ${HOME}/.netrc
|
||||||
```
|
```
|
||||||
After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
|
After that, you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
|
||||||
```
|
```
|
||||||
machine <extractor> login <username> password <password>
|
machine <extractor> login <username> password <password>
|
||||||
```
|
```
|
||||||
|
@ -1201,7 +1203,7 @@ # OUTPUT TEMPLATE
|
||||||
|
|
||||||
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
||||||
|
|
||||||
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7)s`, `%(id.6:2:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||||
|
|
||||||
1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||||
|
|
||||||
|
@ -1282,13 +1284,15 @@ # OUTPUT TEMPLATE
|
||||||
- `n_entries` (numeric): Total number of extracted items in the playlist
|
- `n_entries` (numeric): Total number of extracted items in the playlist
|
||||||
- `playlist_id` (string): Identifier of the playlist that contains the video
|
- `playlist_id` (string): Identifier of the playlist that contains the video
|
||||||
- `playlist_title` (string): Name of the playlist that contains the video
|
- `playlist_title` (string): Name of the playlist that contains the video
|
||||||
- `playlist` (string): `playlist_id` or `playlist_title`
|
- `playlist` (string): `playlist_title` if available or else `playlist_id`
|
||||||
- `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
|
- `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
|
||||||
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
|
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
|
||||||
- `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
|
- `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
|
||||||
- `playlist_uploader` (string): Full name of the playlist uploader
|
- `playlist_uploader` (string): Full name of the playlist uploader
|
||||||
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
|
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
|
||||||
- `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again
|
- `playlist_channel` (string): Display name of the channel that uploaded the playlist
|
||||||
|
- `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist
|
||||||
|
- `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again
|
||||||
- `webpage_url_basename` (string): The basename of the webpage URL
|
- `webpage_url_basename` (string): The basename of the webpage URL
|
||||||
- `webpage_url_domain` (string): The domain of the webpage URL
|
- `webpage_url_domain` (string): The domain of the webpage URL
|
||||||
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
|
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
|
||||||
|
@ -1304,10 +1308,10 @@ # OUTPUT TEMPLATE
|
||||||
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
- `chapter_number` (numeric): Number of the chapter the video belongs to
|
||||||
- `chapter_id` (string): Id of the chapter the video belongs to
|
- `chapter_id` (string): Id of the chapter the video belongs to
|
||||||
|
|
||||||
Available for the video that is an episode of some series or programme:
|
Available for the video that is an episode of some series or program:
|
||||||
|
|
||||||
- `series` (string): Title of the series or programme the video episode belongs to
|
- `series` (string): Title of the series or program the video episode belongs to
|
||||||
- `series_id` (string): Id of the series or programme the video episode belongs to
|
- `series_id` (string): Id of the series or program the video episode belongs to
|
||||||
- `season` (string): Title of the season the video episode belongs to
|
- `season` (string): Title of the season the video episode belongs to
|
||||||
- `season_number` (numeric): Number of the season the video episode belongs to
|
- `season_number` (numeric): Number of the season the video episode belongs to
|
||||||
- `season_id` (string): Id of the season the video episode belongs to
|
- `season_id` (string): Id of the season the video episode belongs to
|
||||||
|
@ -1364,7 +1368,7 @@ # OUTPUT TEMPLATE
|
||||||
|
|
||||||
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
|
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
|
||||||
|
|
||||||
**Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
**Note**: Some of the sequences are not guaranteed to be present, since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
|
||||||
|
|
||||||
**Tip**: Look at the `-j` output to identify which fields are available for the particular URL
|
**Tip**: Look at the `-j` output to identify which fields are available for the particular URL
|
||||||
|
|
||||||
|
@ -1442,7 +1446,7 @@ # FORMAT SELECTION
|
||||||
|
|
||||||
- `all`: Select **all formats** separately
|
- `all`: Select **all formats** separately
|
||||||
- `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
|
- `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
|
||||||
- `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (ie; `vcodec!=none or acodec!=none`)
|
- `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (i.e.; `vcodec!=none or acodec!=none`)
|
||||||
- `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
|
- `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
|
||||||
- `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]`
|
- `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]`
|
||||||
- `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]`
|
- `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]`
|
||||||
|
@ -1455,7 +1459,7 @@ # FORMAT SELECTION
|
||||||
- `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]`
|
- `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]`
|
||||||
- `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
|
- `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
|
||||||
|
|
||||||
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details.
|
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is, however, recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details.
|
||||||
|
|
||||||
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
|
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
|
||||||
|
|
||||||
|
@ -1505,7 +1509,7 @@ ## Filtering Formats
|
||||||
|
|
||||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
|
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
|
||||||
|
|
||||||
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by the particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
||||||
|
|
||||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||||
|
|
||||||
|
@ -1549,9 +1553,9 @@ ## Sorting Formats
|
||||||
|
|
||||||
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
|
||||||
|
|
||||||
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
|
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
|
||||||
|
|
||||||
Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
|
Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
|
||||||
|
|
||||||
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
|
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
|
||||||
|
|
||||||
|
@ -1754,7 +1758,7 @@ # Replace all spaces and "_" in title and uploader with a `-`
|
||||||
|
|
||||||
# EXTRACTOR ARGUMENTS
|
# EXTRACTOR ARGUMENTS
|
||||||
|
|
||||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;formats=incomplete" --extractor-args "funimation:version=uncut"`
|
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"`
|
||||||
|
|
||||||
Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"`
|
Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"`
|
||||||
|
|
||||||
|
@ -1763,7 +1767,7 @@ # EXTRACTOR ARGUMENTS
|
||||||
#### youtube
|
#### youtube
|
||||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,tv` is used, but `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
||||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||||
|
@ -1771,7 +1775,7 @@ #### youtube
|
||||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
||||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||||
* `innertube_key`: Innertube API key to use for all API requests
|
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
||||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||||
|
|
||||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||||
|
@ -1849,7 +1853,16 @@ #### afreecatvlive
|
||||||
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
|
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
|
||||||
|
|
||||||
#### soundcloud
|
#### soundcloud
|
||||||
* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
|
* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
|
||||||
|
|
||||||
|
#### orfon (orf:on)
|
||||||
|
* `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
|
||||||
|
|
||||||
|
#### bilibili
|
||||||
|
* `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats
|
||||||
|
|
||||||
|
#### digitalconcerthall
|
||||||
|
* `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats
|
||||||
|
|
||||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||||
|
|
||||||
|
@ -1862,7 +1875,7 @@ # PLUGINS
|
||||||
|
|
||||||
Plugins can be of `<type>`s `extractor` or `postprocessor`.
|
Plugins can be of `<type>`s `extractor` or `postprocessor`.
|
||||||
- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it.
|
- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it.
|
||||||
- Extractor plugins take priority over builtin extractors.
|
- Extractor plugins take priority over built-in extractors.
|
||||||
- Postprocessor plugins can be invoked using `--use-postprocessor NAME`.
|
- Postprocessor plugins can be invoked using `--use-postprocessor NAME`.
|
||||||
|
|
||||||
|
|
||||||
|
@ -1917,7 +1930,7 @@ ## Developing Plugins
|
||||||
|
|
||||||
See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide.
|
See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide.
|
||||||
|
|
||||||
All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`).
|
All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors respectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`).
|
||||||
|
|
||||||
To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above.
|
To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above.
|
||||||
|
|
||||||
|
@ -1929,7 +1942,7 @@ # EMBEDDING YT-DLP
|
||||||
|
|
||||||
yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language.
|
yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language.
|
||||||
|
|
||||||
Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse.
|
Your program should avoid parsing the normal stdout since they may change in future versions. Instead, they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse.
|
||||||
|
|
||||||
From a Python program, you can embed yt-dlp in a more powerful fashion, like this:
|
From a Python program, you can embed yt-dlp in a more powerful fashion, like this:
|
||||||
|
|
||||||
|
@ -2211,16 +2224,25 @@ ### Differences in default behavior
|
||||||
* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
|
* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
|
||||||
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
||||||
* The sub-modules `swfinterp`, `casefold` are removed.
|
* The sub-modules `swfinterp`, `casefold` are removed.
|
||||||
|
* Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details.
|
||||||
|
|
||||||
For ease of use, a few more compat options are available:
|
For ease of use, a few more compat options are available:
|
||||||
|
|
||||||
* `--compat-options all`: Use all compat options (Do NOT use)
|
* `--compat-options all`: Use all compat options (**Do NOT use this!**)
|
||||||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
|
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext`
|
||||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
|
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext`
|
||||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
||||||
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
|
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
|
||||||
|
|
||||||
|
The following compat options restore vulnerable behavior from before security patches:
|
||||||
|
|
||||||
|
* `--compat-options allow-unsafe-ext`: Allow files with any extension (including unsafe ones) to be downloaded ([GHSA-79w7-vh3h-8g4j](<https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j>))
|
||||||
|
|
||||||
|
> :warning: Only use if a valid file download is rejected because its extension is detected as uncommon
|
||||||
|
>
|
||||||
|
> **This option can enable remote code execution! Consider [opening an issue](<https://github.com/yt-dlp/yt-dlp/issues/new/choose>) instead!**
|
||||||
|
|
||||||
### Deprecated options
|
### Deprecated options
|
||||||
|
|
||||||
These are all the deprecated options and the current alternative to achieve the same effect
|
These are all the deprecated options and the current alternative to achieve the same effect
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
|
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
|
||||||
python -m devscripts.install_deps --include secretstorage
|
python -m devscripts.install_deps --include secretstorage --include curl-cffi
|
||||||
python -m devscripts.make_lazy_extractors
|
python -m devscripts.make_lazy_extractors
|
||||||
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
|
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
|
||||||
python -m bundle.pyinstaller
|
python -m bundle.pyinstaller
|
||||||
|
|
|
@ -169,5 +169,21 @@
|
||||||
"when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6",
|
"when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6",
|
||||||
"short": "[cleanup] Misc (#9765)",
|
"short": "[cleanup] Misc (#9765)",
|
||||||
"authors": ["bashonly", "Grub4K", "seproDev"]
|
"authors": ["bashonly", "Grub4K", "seproDev"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "e6a22834df1776ec4e486526f6df2bf53cb7e06f",
|
||||||
|
"short": "[ie/orf:on] Add `prefer_segments_playlist` extractor-arg (#10314)",
|
||||||
|
"authors": ["seproDev"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "add",
|
||||||
|
"when": "6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733",
|
||||||
|
"short": "[priority] Security: [[CVE-2024-38519](https://nvd.nist.gov/vuln/detail/CVE-2024-38519)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j)\n - Unsafe extensions are now blocked from being downloaded"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "add",
|
||||||
|
"when": "6075a029dba70a89675ae1250e7cdfd91f0eba41",
|
||||||
|
"short": "[priority] Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785)\n - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
2
devscripts/cli_to_api.py
Normal file → Executable file
2
devscripts/cli_to_api.py
Normal file → Executable file
|
@ -1,3 +1,5 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
|
@ -9,6 +9,7 @@ maintainers = [
|
||||||
{name = "Grub4K", email = "contact@grub4k.xyz"},
|
{name = "Grub4K", email = "contact@grub4k.xyz"},
|
||||||
{name = "bashonly", email = "bashonly@protonmail.com"},
|
{name = "bashonly", email = "bashonly@protonmail.com"},
|
||||||
{name = "coletdjnz", email = "coletdjnz@protonmail.com"},
|
{name = "coletdjnz", email = "coletdjnz@protonmail.com"},
|
||||||
|
{name = "sepro", email = "sepro@sepr0.com"},
|
||||||
]
|
]
|
||||||
description = "A feature-rich command-line audio/video downloader"
|
description = "A feature-rich command-line audio/video downloader"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
@ -53,7 +54,10 @@ dependencies = [
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
default = []
|
default = []
|
||||||
curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
curl-cffi = [
|
||||||
|
"curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",
|
||||||
|
"curl-cffi>=0.5.10,!=0.6.*,<0.8; os_name!='nt' and implementation_name=='cpython'",
|
||||||
|
]
|
||||||
secretstorage = [
|
secretstorage = [
|
||||||
"cffi",
|
"cffi",
|
||||||
"secretstorage",
|
"secretstorage",
|
||||||
|
@ -62,7 +66,7 @@ build = [
|
||||||
"build",
|
"build",
|
||||||
"hatchling",
|
"hatchling",
|
||||||
"pip",
|
"pip",
|
||||||
"setuptools",
|
"setuptools>=71.0.2", # 71.0.0 broke pyinstaller
|
||||||
"wheel",
|
"wheel",
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
|
@ -72,7 +76,7 @@ dev = [
|
||||||
]
|
]
|
||||||
static-analysis = [
|
static-analysis = [
|
||||||
"autopep8~=2.0",
|
"autopep8~=2.0",
|
||||||
"ruff~=0.4.4",
|
"ruff~=0.5.0",
|
||||||
]
|
]
|
||||||
test = [
|
test = [
|
||||||
"pytest~=8.1",
|
"pytest~=8.1",
|
||||||
|
@ -211,6 +215,7 @@ ignore = [
|
||||||
"TD002", # missing-todo-author
|
"TD002", # missing-todo-author
|
||||||
"TD003", # missing-todo-link
|
"TD003", # missing-todo-link
|
||||||
"PLE0604", # invalid-all-object (false positives)
|
"PLE0604", # invalid-all-object (false positives)
|
||||||
|
"PLE0643", # potential-index-error (false positives)
|
||||||
"PLW0603", # global-statement
|
"PLW0603", # global-statement
|
||||||
"PLW1510", # subprocess-run-without-check
|
"PLW1510", # subprocess-run-without-check
|
||||||
"PLW2901", # redefined-loop-name
|
"PLW2901", # redefined-loop-name
|
||||||
|
@ -298,7 +303,7 @@ banned-from = [
|
||||||
"string",
|
"string",
|
||||||
"sys",
|
"sys",
|
||||||
"time",
|
"time",
|
||||||
"urllib",
|
"urllib.parse",
|
||||||
"uuid",
|
"uuid",
|
||||||
"xml",
|
"xml",
|
||||||
]
|
]
|
||||||
|
|
|
@ -46,6 +46,7 @@ # Supported sites
|
||||||
- **aenetworks:show**
|
- **aenetworks:show**
|
||||||
- **AeonCo**
|
- **AeonCo**
|
||||||
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
|
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
|
||||||
|
- **afreecatv:catchstory**: [*afreecatv*](## "netrc machine") afreecatv.com catch story
|
||||||
- **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams
|
- **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams
|
||||||
- **afreecatv:user**
|
- **afreecatv:user**
|
||||||
- **AirTV**
|
- **AirTV**
|
||||||
|
@ -353,7 +354,6 @@ # Supported sites
|
||||||
- **DigitallySpeaking**
|
- **DigitallySpeaking**
|
||||||
- **Digiteka**
|
- **Digiteka**
|
||||||
- **DiscogsReleasePlaylist**
|
- **DiscogsReleasePlaylist**
|
||||||
- **Discovery**
|
|
||||||
- **DiscoveryLife**
|
- **DiscoveryLife**
|
||||||
- **DiscoveryNetworksDe**
|
- **DiscoveryNetworksDe**
|
||||||
- **DiscoveryPlus**
|
- **DiscoveryPlus**
|
||||||
|
@ -362,7 +362,6 @@ # Supported sites
|
||||||
- **DiscoveryPlusItaly**
|
- **DiscoveryPlusItaly**
|
||||||
- **DiscoveryPlusItalyShow**
|
- **DiscoveryPlusItalyShow**
|
||||||
- **Disney**
|
- **Disney**
|
||||||
- **DIYNetwork**
|
|
||||||
- **dlf**
|
- **dlf**
|
||||||
- **dlf:corpus**: DLF Multi-feed Archives
|
- **dlf:corpus**: DLF Multi-feed Archives
|
||||||
- **dlive:stream**
|
- **dlive:stream**
|
||||||
|
@ -515,7 +514,6 @@ # Supported sites
|
||||||
- **GlattvisionTVLive**: [*glattvisiontv*](## "netrc machine")
|
- **GlattvisionTVLive**: [*glattvisiontv*](## "netrc machine")
|
||||||
- **GlattvisionTVRecordings**: [*glattvisiontv*](## "netrc machine")
|
- **GlattvisionTVRecordings**: [*glattvisiontv*](## "netrc machine")
|
||||||
- **Glide**: Glide mobile video messages (glide.me)
|
- **Glide**: Glide mobile video messages (glide.me)
|
||||||
- **GlobalCyclingNetworkPlus**
|
|
||||||
- **GlobalPlayerAudio**
|
- **GlobalPlayerAudio**
|
||||||
- **GlobalPlayerAudioEpisode**
|
- **GlobalPlayerAudioEpisode**
|
||||||
- **GlobalPlayerLive**
|
- **GlobalPlayerLive**
|
||||||
|
@ -542,6 +540,7 @@ # Supported sites
|
||||||
- **Goshgay**
|
- **Goshgay**
|
||||||
- **GoToStage**
|
- **GoToStage**
|
||||||
- **GPUTechConf**
|
- **GPUTechConf**
|
||||||
|
- **Graspop**
|
||||||
- **Gronkh**
|
- **Gronkh**
|
||||||
- **gronkh:feed**
|
- **gronkh:feed**
|
||||||
- **gronkh:vods**
|
- **gronkh:vods**
|
||||||
|
@ -656,10 +655,11 @@ # Supported sites
|
||||||
- **Ketnet**
|
- **Ketnet**
|
||||||
- **khanacademy**
|
- **khanacademy**
|
||||||
- **khanacademy:unit**
|
- **khanacademy:unit**
|
||||||
- **Kick**
|
- **kick:clips**
|
||||||
|
- **kick:live**
|
||||||
|
- **kick:vod**
|
||||||
- **Kicker**
|
- **Kicker**
|
||||||
- **KickStarter**
|
- **KickStarter**
|
||||||
- **KickVOD**
|
|
||||||
- **kinja:embed**
|
- **kinja:embed**
|
||||||
- **KinoPoisk**
|
- **KinoPoisk**
|
||||||
- **Kommunetv**
|
- **Kommunetv**
|
||||||
|
@ -678,6 +678,8 @@ # Supported sites
|
||||||
- **la7.it**
|
- **la7.it**
|
||||||
- **la7.it:pod:episode**
|
- **la7.it:pod:episode**
|
||||||
- **la7.it:podcast**
|
- **la7.it:podcast**
|
||||||
|
- **laracasts**
|
||||||
|
- **laracasts:series**
|
||||||
- **LastFM**
|
- **LastFM**
|
||||||
- **LastFMPlaylist**
|
- **LastFMPlaylist**
|
||||||
- **LastFMUser**
|
- **LastFMUser**
|
||||||
|
@ -689,6 +691,7 @@ # Supported sites
|
||||||
- **Lcp**
|
- **Lcp**
|
||||||
- **LcpPlay**
|
- **LcpPlay**
|
||||||
- **Le**: 乐视网
|
- **Le**: 乐视网
|
||||||
|
- **LearningOnScreen**
|
||||||
- **Lecture2Go**: (**Currently broken**)
|
- **Lecture2Go**: (**Currently broken**)
|
||||||
- **Lecturio**: [*lecturio*](## "netrc machine")
|
- **Lecturio**: [*lecturio*](## "netrc machine")
|
||||||
- **LecturioCourse**: [*lecturio*](## "netrc machine")
|
- **LecturioCourse**: [*lecturio*](## "netrc machine")
|
||||||
|
@ -775,7 +778,12 @@ # Supported sites
|
||||||
- **MelonVOD**
|
- **MelonVOD**
|
||||||
- **Metacritic**
|
- **Metacritic**
|
||||||
- **mewatch**
|
- **mewatch**
|
||||||
|
- **MicrosoftBuild**
|
||||||
- **MicrosoftEmbed**
|
- **MicrosoftEmbed**
|
||||||
|
- **MicrosoftLearnEpisode**
|
||||||
|
- **MicrosoftLearnPlaylist**
|
||||||
|
- **MicrosoftLearnSession**
|
||||||
|
- **MicrosoftMedius**
|
||||||
- **microsoftstream**: Microsoft Stream
|
- **microsoftstream**: Microsoft Stream
|
||||||
- **mildom**: Record ongoing live by specific user in Mildom
|
- **mildom**: Record ongoing live by specific user in Mildom
|
||||||
- **mildom:clip**: Clip in Mildom
|
- **mildom:clip**: Clip in Mildom
|
||||||
|
@ -811,8 +819,6 @@ # Supported sites
|
||||||
- **MotherlessGroup**
|
- **MotherlessGroup**
|
||||||
- **MotherlessUploader**
|
- **MotherlessUploader**
|
||||||
- **Motorsport**: motorsport.com (**Currently broken**)
|
- **Motorsport**: motorsport.com (**Currently broken**)
|
||||||
- **MotorTrend**
|
|
||||||
- **MotorTrendOnDemand**
|
|
||||||
- **MovieFap**
|
- **MovieFap**
|
||||||
- **Moviepilot**
|
- **Moviepilot**
|
||||||
- **MoviewPlay**
|
- **MoviewPlay**
|
||||||
|
@ -830,7 +836,7 @@ # Supported sites
|
||||||
- **MTVUutisetArticle**: (**Currently broken**)
|
- **MTVUutisetArticle**: (**Currently broken**)
|
||||||
- **MuenchenTV**: münchen.tv (**Currently broken**)
|
- **MuenchenTV**: münchen.tv (**Currently broken**)
|
||||||
- **MujRozhlas**
|
- **MujRozhlas**
|
||||||
- **Murrtube**: (**Currently broken**)
|
- **Murrtube**
|
||||||
- **MurrtubeUser**: Murrtube user profile (**Currently broken**)
|
- **MurrtubeUser**: Murrtube user profile (**Currently broken**)
|
||||||
- **MuseAI**
|
- **MuseAI**
|
||||||
- **MuseScore**
|
- **MuseScore**
|
||||||
|
@ -838,8 +844,6 @@ # Supported sites
|
||||||
- **MusicdexArtist**
|
- **MusicdexArtist**
|
||||||
- **MusicdexPlaylist**
|
- **MusicdexPlaylist**
|
||||||
- **MusicdexSong**
|
- **MusicdexSong**
|
||||||
- **mva**: Microsoft Virtual Academy videos
|
|
||||||
- **mva:course**: Microsoft Virtual Academy courses
|
|
||||||
- **Mx3**
|
- **Mx3**
|
||||||
- **Mx3Neo**
|
- **Mx3Neo**
|
||||||
- **Mx3Volksmusik**
|
- **Mx3Volksmusik**
|
||||||
|
@ -1131,13 +1135,13 @@ # Supported sites
|
||||||
- **QingTing**
|
- **QingTing**
|
||||||
- **qqmusic**: QQ音乐
|
- **qqmusic**: QQ音乐
|
||||||
- **qqmusic:album**: QQ音乐 - 专辑
|
- **qqmusic:album**: QQ音乐 - 专辑
|
||||||
|
- **qqmusic:mv**: QQ音乐 - MV
|
||||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||||
- **QuantumTV**: [*quantumtv*](## "netrc machine")
|
- **QuantumTV**: [*quantumtv*](## "netrc machine")
|
||||||
- **QuantumTVLive**: [*quantumtv*](## "netrc machine")
|
- **QuantumTVLive**: [*quantumtv*](## "netrc machine")
|
||||||
- **QuantumTVRecordings**: [*quantumtv*](## "netrc machine")
|
- **QuantumTVRecordings**: [*quantumtv*](## "netrc machine")
|
||||||
- **Qub**
|
|
||||||
- **R7**: (**Currently broken**)
|
- **R7**: (**Currently broken**)
|
||||||
- **R7Article**: (**Currently broken**)
|
- **R7Article**: (**Currently broken**)
|
||||||
- **Radiko**
|
- **Radiko**
|
||||||
|
@ -1237,6 +1241,7 @@ # Supported sites
|
||||||
- **rtve.es:television**
|
- **rtve.es:television**
|
||||||
- **RTVS**
|
- **RTVS**
|
||||||
- **rtvslo.si**
|
- **rtvslo.si**
|
||||||
|
- **rtvslo.si:show**
|
||||||
- **RudoVideo**
|
- **RudoVideo**
|
||||||
- **Rule34Video**
|
- **Rule34Video**
|
||||||
- **Rumble**
|
- **Rumble**
|
||||||
|
@ -1360,6 +1365,7 @@ # Supported sites
|
||||||
- **SpreakerShowPage**
|
- **SpreakerShowPage**
|
||||||
- **SpringboardPlatform**
|
- **SpringboardPlatform**
|
||||||
- **Sprout**
|
- **Sprout**
|
||||||
|
- **SproutVideo**
|
||||||
- **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
|
- **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
|
||||||
- **SRGSSR**
|
- **SRGSSR**
|
||||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||||
|
@ -1494,8 +1500,8 @@ # Supported sites
|
||||||
- **Tube8**: (**Currently broken**)
|
- **Tube8**: (**Currently broken**)
|
||||||
- **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at
|
- **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at
|
||||||
- **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine")
|
- **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine")
|
||||||
- **TubiTv**: [*tubitv*](## "netrc machine")
|
- **tubitv**: [*tubitv*](## "netrc machine")
|
||||||
- **TubiTvShow**
|
- **tubitv:series**
|
||||||
- **Tumblr**: [*tumblr*](## "netrc machine")
|
- **Tumblr**: [*tumblr*](## "netrc machine")
|
||||||
- **TuneInPodcast**
|
- **TuneInPodcast**
|
||||||
- **TuneInPodcastEpisode**
|
- **TuneInPodcastEpisode**
|
||||||
|
@ -1512,9 +1518,9 @@ # Supported sites
|
||||||
- **tv5unis**
|
- **tv5unis**
|
||||||
- **tv5unis:video**
|
- **tv5unis:video**
|
||||||
- **tv8.it**
|
- **tv8.it**
|
||||||
- **TVA**
|
|
||||||
- **TVANouvelles**
|
- **TVANouvelles**
|
||||||
- **TVANouvellesArticle**
|
- **TVANouvellesArticle**
|
||||||
|
- **tvaplus**: TVA+
|
||||||
- **TVC**
|
- **TVC**
|
||||||
- **TVCArticle**
|
- **TVCArticle**
|
||||||
- **TVer**
|
- **TVer**
|
||||||
|
@ -1607,6 +1613,8 @@ # Supported sites
|
||||||
- **VidioPremier**: [*vidio*](## "netrc machine")
|
- **VidioPremier**: [*vidio*](## "netrc machine")
|
||||||
- **VidLii**
|
- **VidLii**
|
||||||
- **Vidly**
|
- **Vidly**
|
||||||
|
- **vids.io**
|
||||||
|
- **Vidyard**
|
||||||
- **viewlift**
|
- **viewlift**
|
||||||
- **viewlift:embed**
|
- **viewlift:embed**
|
||||||
- **Viidea**
|
- **Viidea**
|
||||||
|
@ -1654,6 +1662,8 @@ # Supported sites
|
||||||
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
|
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
|
||||||
- **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX
|
- **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX
|
||||||
- **VTM**: (**Currently broken**)
|
- **VTM**: (**Currently broken**)
|
||||||
|
- **VTV**
|
||||||
|
- **VTVGo**
|
||||||
- **VTXTV**: [*vtxtv*](## "netrc machine")
|
- **VTXTV**: [*vtxtv*](## "netrc machine")
|
||||||
- **VTXTVLive**: [*vtxtv*](## "netrc machine")
|
- **VTXTVLive**: [*vtxtv*](## "netrc machine")
|
||||||
- **VTXTVRecordings**: [*vtxtv*](## "netrc machine")
|
- **VTXTVRecordings**: [*vtxtv*](## "netrc machine")
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
@ -520,7 +521,33 @@ def test_format_filtering(self):
|
||||||
ydl.process_ie_result(info_dict)
|
ydl.process_ie_result(info_dict)
|
||||||
self.assertEqual(ydl.downloaded_info_dicts, [])
|
self.assertEqual(ydl.downloaded_info_dicts, [])
|
||||||
|
|
||||||
def test_default_format_spec(self):
|
@patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.available', False)
|
||||||
|
def test_default_format_spec_without_ffmpeg(self):
|
||||||
|
ydl = YDL({})
|
||||||
|
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
|
ydl = YDL({'simulate': True})
|
||||||
|
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
|
ydl = YDL({})
|
||||||
|
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
|
ydl = YDL({'simulate': True})
|
||||||
|
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
|
ydl = YDL({'outtmpl': '-'})
|
||||||
|
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
|
ydl = YDL({})
|
||||||
|
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||||
|
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
|
@patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.available', True)
|
||||||
|
@patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.can_merge', lambda _: True)
|
||||||
|
def test_default_format_spec_with_ffmpeg(self):
|
||||||
|
ydl = YDL({})
|
||||||
|
self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
|
||||||
|
|
||||||
ydl = YDL({'simulate': True})
|
ydl = YDL({'simulate': True})
|
||||||
self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
|
self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
|
||||||
|
|
||||||
|
@ -528,13 +555,13 @@ def test_default_format_spec(self):
|
||||||
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
ydl = YDL({'simulate': True})
|
ydl = YDL({'simulate': True})
|
||||||
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo*+bestaudio/best')
|
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
ydl = YDL({'outtmpl': '-'})
|
ydl = YDL({'outtmpl': '-'})
|
||||||
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
ydl = YDL({})
|
ydl = YDL({})
|
||||||
self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo*+bestaudio/best')
|
self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
|
||||||
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,7 @@ def test_get_desktop_environment(self):
|
||||||
({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME),
|
({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
|
({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
|
({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
|
({'XDG_CURRENT_DESKTOP': 'ubuntu:GNOME'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
|
|
||||||
({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME),
|
({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME),
|
||||||
({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5),
|
({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5),
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
gettestcases,
|
gettestcases,
|
||||||
getwebpagetestcases,
|
getwebpagetestcases,
|
||||||
is_download_test,
|
is_download_test,
|
||||||
report_warning,
|
|
||||||
try_rm,
|
try_rm,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -178,8 +177,7 @@ def try_rm_tcs_files(tcs=None):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if try_num == RETRIES:
|
if try_num == RETRIES:
|
||||||
report_warning(f'{tname} failed due to network errors, skipping...')
|
raise
|
||||||
return
|
|
||||||
|
|
||||||
print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
|
print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
|
||||||
|
|
||||||
|
|
|
@ -92,6 +92,7 @@ def test_operators(self):
|
||||||
self._test('function f(){return 0 && 1 || 2;}', 2)
|
self._test('function f(){return 0 && 1 || 2;}', 2)
|
||||||
self._test('function f(){return 0 ?? 42;}', 0)
|
self._test('function f(){return 0 ?? 42;}', 0)
|
||||||
self._test('function f(){return "life, the universe and everything" < 42;}', False)
|
self._test('function f(){return "life, the universe and everything" < 42;}', False)
|
||||||
|
self._test('function f(){return 0 - 7 * - 6;}', 42)
|
||||||
|
|
||||||
def test_array_access(self):
|
def test_array_access(self):
|
||||||
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
|
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])
|
||||||
|
@ -375,6 +376,33 @@ def test_packed(self):
|
||||||
jsi = JSInterpreter('''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
|
jsi = JSInterpreter('''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
|
||||||
self.assertEqual(jsi.call_function('f', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
|
self.assertEqual(jsi.call_function('f', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
|
||||||
|
|
||||||
|
def test_join(self):
|
||||||
|
test_input = list('test')
|
||||||
|
tests = [
|
||||||
|
'function f(a, b){return a.join(b)}',
|
||||||
|
'function f(a, b){return Array.prototype.join.call(a, b)}',
|
||||||
|
'function f(a, b){return Array.prototype.join.apply(a, [b])}',
|
||||||
|
]
|
||||||
|
for test in tests:
|
||||||
|
jsi = JSInterpreter(test)
|
||||||
|
self._test(jsi, 'test', args=[test_input, ''])
|
||||||
|
self._test(jsi, 't-e-s-t', args=[test_input, '-'])
|
||||||
|
self._test(jsi, '', args=[[], '-'])
|
||||||
|
|
||||||
|
def test_split(self):
|
||||||
|
test_result = list('test')
|
||||||
|
tests = [
|
||||||
|
'function f(a, b){return a.split(b)}',
|
||||||
|
'function f(a, b){return String.prototype.split.call(a, b)}',
|
||||||
|
'function f(a, b){return String.prototype.split.apply(a, [b])}',
|
||||||
|
]
|
||||||
|
for test in tests:
|
||||||
|
jsi = JSInterpreter(test)
|
||||||
|
self._test(jsi, test_result, args=['test', ''])
|
||||||
|
self._test(jsi, test_result, args=['t-e-s-t', '-'])
|
||||||
|
self._test(jsi, [''], args=['', '-'])
|
||||||
|
self._test(jsi, [], args=['', ''])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -265,6 +265,11 @@ def do_GET(self):
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(payload)
|
self.wfile.write(payload)
|
||||||
self.finish()
|
self.finish()
|
||||||
|
elif self.path == '/get_cookie':
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Set-Cookie', 'test=ytdlp; path=/')
|
||||||
|
self.end_headers()
|
||||||
|
self.finish()
|
||||||
else:
|
else:
|
||||||
self._status(404)
|
self._status(404)
|
||||||
|
|
||||||
|
@ -338,6 +343,52 @@ def test_ssl_error(self, handler):
|
||||||
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler('CurlCFFI', 'legacy_ssl ignored by CurlCFFI')
|
||||||
|
def test_legacy_ssl_extension(self, handler):
|
||||||
|
# HTTPS server with old ciphers
|
||||||
|
# XXX: is there a better way to test this than to create a new server?
|
||||||
|
https_httpd = http.server.ThreadingHTTPServer(
|
||||||
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
|
||||||
|
sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
|
||||||
|
sslctx.load_cert_chain(os.path.join(TEST_DIR, 'testcert.pem'), None)
|
||||||
|
https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
|
||||||
|
https_port = http_server_port(https_httpd)
|
||||||
|
https_server_thread = threading.Thread(target=https_httpd.serve_forever)
|
||||||
|
https_server_thread.daemon = True
|
||||||
|
https_server_thread.start()
|
||||||
|
|
||||||
|
with handler(verify=False) as rh:
|
||||||
|
res = validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers', extensions={'legacy_ssl': True}))
|
||||||
|
assert res.status == 200
|
||||||
|
res.close()
|
||||||
|
|
||||||
|
# Ensure only applies to request extension
|
||||||
|
with pytest.raises(SSLError):
|
||||||
|
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler('CurlCFFI', 'legacy_ssl ignored by CurlCFFI')
|
||||||
|
def test_legacy_ssl_support(self, handler):
|
||||||
|
# HTTPS server with old ciphers
|
||||||
|
# XXX: is there a better way to test this than to create a new server?
|
||||||
|
https_httpd = http.server.ThreadingHTTPServer(
|
||||||
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
|
||||||
|
sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
|
||||||
|
sslctx.load_cert_chain(os.path.join(TEST_DIR, 'testcert.pem'), None)
|
||||||
|
https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True)
|
||||||
|
https_port = http_server_port(https_httpd)
|
||||||
|
https_server_thread = threading.Thread(target=https_httpd.serve_forever)
|
||||||
|
https_server_thread.daemon = True
|
||||||
|
https_server_thread.start()
|
||||||
|
|
||||||
|
with handler(verify=False, legacy_ssl_support=True) as rh:
|
||||||
|
res = validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||||
|
assert res.status == 200
|
||||||
|
res.close()
|
||||||
|
|
||||||
def test_percent_encode(self, handler):
|
def test_percent_encode(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# Unicode characters should be encoded with uppercase percent-encoding
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
|
@ -490,6 +541,24 @@ def test_cookies(self, handler):
|
||||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
||||||
assert b'cookie: test=ytdlp' in data.lower()
|
assert b'cookie: test=ytdlp' in data.lower()
|
||||||
|
|
||||||
|
def test_cookie_sync_only_cookiejar(self, handler):
|
||||||
|
# Ensure that cookies are ONLY being handled by the cookiejar
|
||||||
|
with handler() as rh:
|
||||||
|
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/get_cookie', extensions={'cookiejar': YoutubeDLCookieJar()}))
|
||||||
|
data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': YoutubeDLCookieJar()})).read()
|
||||||
|
assert b'cookie: test=ytdlp' not in data.lower()
|
||||||
|
|
||||||
|
def test_cookie_sync_delete_cookie(self, handler):
|
||||||
|
# Ensure that cookies are ONLY being handled by the cookiejar
|
||||||
|
cookiejar = YoutubeDLCookieJar()
|
||||||
|
with handler(cookiejar=cookiejar) as rh:
|
||||||
|
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/get_cookie'))
|
||||||
|
data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
|
||||||
|
assert b'cookie: test=ytdlp' in data.lower()
|
||||||
|
cookiejar.clear_session_cookies()
|
||||||
|
data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
|
||||||
|
assert b'cookie: test=ytdlp' not in data.lower()
|
||||||
|
|
||||||
def test_headers(self, handler):
|
def test_headers(self, handler):
|
||||||
|
|
||||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||||
|
@ -914,7 +983,6 @@ def mock_close(*args, **kwargs):
|
||||||
class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
|
class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
|
||||||
|
|
||||||
@pytest.mark.parametrize('params,extensions', [
|
@pytest.mark.parametrize('params,extensions', [
|
||||||
({}, {'impersonate': ImpersonateTarget('chrome')}),
|
|
||||||
({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
|
({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
|
||||||
({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
|
({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
|
||||||
])
|
])
|
||||||
|
@ -1200,6 +1268,9 @@ class HTTPSupportedRH(ValidationRH):
|
||||||
({'timeout': 1}, False),
|
({'timeout': 1}, False),
|
||||||
({'timeout': 'notatimeout'}, AssertionError),
|
({'timeout': 'notatimeout'}, AssertionError),
|
||||||
({'unsupported': 'value'}, UnsupportedRequest),
|
({'unsupported': 'value'}, UnsupportedRequest),
|
||||||
|
({'legacy_ssl': False}, False),
|
||||||
|
({'legacy_ssl': True}, False),
|
||||||
|
({'legacy_ssl': 'notabool'}, AssertionError),
|
||||||
]),
|
]),
|
||||||
('Requests', 'http', [
|
('Requests', 'http', [
|
||||||
({'cookiejar': 'notacookiejar'}, AssertionError),
|
({'cookiejar': 'notacookiejar'}, AssertionError),
|
||||||
|
@ -1207,6 +1278,9 @@ class HTTPSupportedRH(ValidationRH):
|
||||||
({'timeout': 1}, False),
|
({'timeout': 1}, False),
|
||||||
({'timeout': 'notatimeout'}, AssertionError),
|
({'timeout': 'notatimeout'}, AssertionError),
|
||||||
({'unsupported': 'value'}, UnsupportedRequest),
|
({'unsupported': 'value'}, UnsupportedRequest),
|
||||||
|
({'legacy_ssl': False}, False),
|
||||||
|
({'legacy_ssl': True}, False),
|
||||||
|
({'legacy_ssl': 'notabool'}, AssertionError),
|
||||||
]),
|
]),
|
||||||
('CurlCFFI', 'http', [
|
('CurlCFFI', 'http', [
|
||||||
({'cookiejar': 'notacookiejar'}, AssertionError),
|
({'cookiejar': 'notacookiejar'}, AssertionError),
|
||||||
|
@ -1220,6 +1294,9 @@ class HTTPSupportedRH(ValidationRH):
|
||||||
({'impersonate': ImpersonateTarget(None, None, None, None)}, False),
|
({'impersonate': ImpersonateTarget(None, None, None, None)}, False),
|
||||||
({'impersonate': ImpersonateTarget()}, False),
|
({'impersonate': ImpersonateTarget()}, False),
|
||||||
({'impersonate': 'chrome'}, AssertionError),
|
({'impersonate': 'chrome'}, AssertionError),
|
||||||
|
({'legacy_ssl': False}, False),
|
||||||
|
({'legacy_ssl': True}, False),
|
||||||
|
({'legacy_ssl': 'notabool'}, AssertionError),
|
||||||
]),
|
]),
|
||||||
(NoCheckRH, 'http', [
|
(NoCheckRH, 'http', [
|
||||||
({'cookiejar': 'notacookiejar'}, False),
|
({'cookiejar': 'notacookiejar'}, False),
|
||||||
|
@ -1228,6 +1305,9 @@ class HTTPSupportedRH(ValidationRH):
|
||||||
('Websockets', 'ws', [
|
('Websockets', 'ws', [
|
||||||
({'cookiejar': YoutubeDLCookieJar()}, False),
|
({'cookiejar': YoutubeDLCookieJar()}, False),
|
||||||
({'timeout': 2}, False),
|
({'timeout': 2}, False),
|
||||||
|
({'legacy_ssl': False}, False),
|
||||||
|
({'legacy_ssl': True}, False),
|
||||||
|
({'legacy_ssl': 'notabool'}, AssertionError),
|
||||||
]),
|
]),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -130,6 +130,7 @@
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
from yt_dlp.utils._utils import _UnsafeExtensionError
|
||||||
from yt_dlp.utils.networking import (
|
from yt_dlp.utils.networking import (
|
||||||
HTTPHeaderDict,
|
HTTPHeaderDict,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
|
@ -281,6 +282,13 @@ def env(var):
|
||||||
finally:
|
finally:
|
||||||
os.environ['HOME'] = old_home or ''
|
os.environ['HOME'] = old_home or ''
|
||||||
|
|
||||||
|
_uncommon_extensions = [
|
||||||
|
('exe', 'abc.exe.ext'),
|
||||||
|
('de', 'abc.de.ext'),
|
||||||
|
('../.mp4', None),
|
||||||
|
('..\\.mp4', None),
|
||||||
|
]
|
||||||
|
|
||||||
def test_prepend_extension(self):
|
def test_prepend_extension(self):
|
||||||
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
|
||||||
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
|
||||||
|
@ -289,6 +297,19 @@ def test_prepend_extension(self):
|
||||||
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
|
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
|
||||||
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
|
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
|
||||||
|
|
||||||
|
# Test uncommon extensions
|
||||||
|
self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
|
||||||
|
for ext, result in self._uncommon_extensions:
|
||||||
|
with self.assertRaises(_UnsafeExtensionError):
|
||||||
|
prepend_extension('abc', ext)
|
||||||
|
if result:
|
||||||
|
self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
|
||||||
|
else:
|
||||||
|
with self.assertRaises(_UnsafeExtensionError):
|
||||||
|
prepend_extension('abc.ext', ext, 'ext')
|
||||||
|
with self.assertRaises(_UnsafeExtensionError):
|
||||||
|
prepend_extension('abc.unexpected_ext', ext, 'ext')
|
||||||
|
|
||||||
def test_replace_extension(self):
|
def test_replace_extension(self):
|
||||||
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
|
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
|
||||||
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
|
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
|
||||||
|
@ -297,6 +318,16 @@ def test_replace_extension(self):
|
||||||
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
|
||||||
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
|
||||||
|
|
||||||
|
# Test uncommon extensions
|
||||||
|
self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
|
||||||
|
for ext, _ in self._uncommon_extensions:
|
||||||
|
with self.assertRaises(_UnsafeExtensionError):
|
||||||
|
replace_extension('abc', ext)
|
||||||
|
with self.assertRaises(_UnsafeExtensionError):
|
||||||
|
replace_extension('abc.ext', ext, 'ext')
|
||||||
|
with self.assertRaises(_UnsafeExtensionError):
|
||||||
|
replace_extension('abc.unexpected_ext', ext, 'ext')
|
||||||
|
|
||||||
def test_subtitles_filename(self):
|
def test_subtitles_filename(self):
|
||||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
|
||||||
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
|
||||||
|
@ -413,6 +444,8 @@ def test_unified_timestamps(self):
|
||||||
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
|
||||||
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
|
||||||
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
|
||||||
|
self.assertEqual(unified_timestamp('Sunday, 26 Nov 2006, 19:00'), 1164567600)
|
||||||
|
self.assertEqual(unified_timestamp('wed, aug 16, 2008, 12:00pm'), 1218931200)
|
||||||
|
|
||||||
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
|
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
|
||||||
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
|
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
|
||||||
|
@ -898,6 +931,11 @@ def test_parse_codecs(self):
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'dynamic_range': 'DV',
|
'dynamic_range': 'DV',
|
||||||
})
|
})
|
||||||
|
self.assertEqual(parse_codecs('fLaC'), {
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'flac',
|
||||||
|
'dynamic_range': None,
|
||||||
|
})
|
||||||
self.assertEqual(parse_codecs('theora, vorbis'), {
|
self.assertEqual(parse_codecs('theora, vorbis'), {
|
||||||
'vcodec': 'theora',
|
'vcodec': 'theora',
|
||||||
'acodec': 'vorbis',
|
'acodec': 'vorbis',
|
||||||
|
|
|
@ -61,6 +61,10 @@ def process_request(self, request):
|
||||||
return websockets.http11.Response(
|
return websockets.http11.Response(
|
||||||
status.value, status.phrase, websockets.datastructures.Headers([('Location', '/')]), b'')
|
status.value, status.phrase, websockets.datastructures.Headers([('Location', '/')]), b'')
|
||||||
return self.protocol.reject(status.value, status.phrase)
|
return self.protocol.reject(status.value, status.phrase)
|
||||||
|
elif request.path.startswith('/get_cookie'):
|
||||||
|
response = self.protocol.accept(request)
|
||||||
|
response.headers['Set-Cookie'] = 'test=ytdlp'
|
||||||
|
return response
|
||||||
return self.protocol.accept(request)
|
return self.protocol.accept(request)
|
||||||
|
|
||||||
|
|
||||||
|
@ -102,6 +106,15 @@ def create_mtls_wss_websocket_server():
|
||||||
return create_websocket_server(ssl_context=sslctx)
|
return create_websocket_server(ssl_context=sslctx)
|
||||||
|
|
||||||
|
|
||||||
|
def create_legacy_wss_websocket_server():
|
||||||
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
|
||||||
|
sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
|
||||||
|
sslctx.load_cert_chain(certfn, None)
|
||||||
|
return create_websocket_server(ssl_context=sslctx)
|
||||||
|
|
||||||
|
|
||||||
def ws_validate_and_send(rh, req):
|
def ws_validate_and_send(rh, req):
|
||||||
rh.validate(req)
|
rh.validate(req)
|
||||||
max_tries = 3
|
max_tries = 3
|
||||||
|
@ -132,6 +145,9 @@ def setup_class(cls):
|
||||||
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
|
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
|
||||||
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
|
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
|
||||||
|
|
||||||
|
cls.legacy_wss_thread, cls.legacy_wss_port = create_legacy_wss_websocket_server()
|
||||||
|
cls.legacy_wss_host = f'wss://127.0.0.1:{cls.legacy_wss_port}'
|
||||||
|
|
||||||
def test_basic_websockets(self, handler):
|
def test_basic_websockets(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||||
|
@ -166,6 +182,22 @@ def test_ssl_error(self, handler):
|
||||||
ws_validate_and_send(rh, Request(self.bad_wss_host))
|
ws_validate_and_send(rh, Request(self.bad_wss_host))
|
||||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||||
|
|
||||||
|
def test_legacy_ssl_extension(self, handler):
|
||||||
|
with handler(verify=False) as rh:
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.legacy_wss_host, extensions={'legacy_ssl': True}))
|
||||||
|
assert ws.status == 101
|
||||||
|
ws.close()
|
||||||
|
|
||||||
|
# Ensure only applies to request extension
|
||||||
|
with pytest.raises(SSLError):
|
||||||
|
ws_validate_and_send(rh, Request(self.legacy_wss_host))
|
||||||
|
|
||||||
|
def test_legacy_ssl_support(self, handler):
|
||||||
|
with handler(verify=False, legacy_ssl_support=True) as rh:
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.legacy_wss_host))
|
||||||
|
assert ws.status == 101
|
||||||
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('path,expected', [
|
@pytest.mark.parametrize('path,expected', [
|
||||||
# Unicode characters should be encoded with uppercase percent-encoding
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
('/中文', '/%E4%B8%AD%E6%96%87'),
|
('/中文', '/%E4%B8%AD%E6%96%87'),
|
||||||
|
@ -248,6 +280,32 @@ def test_cookies(self, handler):
|
||||||
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
|
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler('Websockets', 'Set-Cookie not supported by websockets')
|
||||||
|
def test_cookie_sync_only_cookiejar(self, handler):
|
||||||
|
# Ensure that cookies are ONLY being handled by the cookiejar
|
||||||
|
with handler() as rh:
|
||||||
|
ws_validate_and_send(rh, Request(f'{self.ws_base_url}/get_cookie', extensions={'cookiejar': YoutubeDLCookieJar()}))
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': YoutubeDLCookieJar()}))
|
||||||
|
ws.send('headers')
|
||||||
|
assert 'cookie' not in json.loads(ws.recv())
|
||||||
|
ws.close()
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler('Websockets', 'Set-Cookie not supported by websockets')
|
||||||
|
def test_cookie_sync_delete_cookie(self, handler):
|
||||||
|
# Ensure that cookies are ONLY being handled by the cookiejar
|
||||||
|
cookiejar = YoutubeDLCookieJar()
|
||||||
|
with handler(verbose=True, cookiejar=cookiejar) as rh:
|
||||||
|
ws_validate_and_send(rh, Request(f'{self.ws_base_url}/get_cookie'))
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||||
|
ws.send('headers')
|
||||||
|
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
|
||||||
|
ws.close()
|
||||||
|
cookiejar.clear_session_cookies()
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||||
|
ws.send('headers')
|
||||||
|
assert 'cookie' not in json.loads(ws.recv())
|
||||||
|
ws.close()
|
||||||
|
|
||||||
def test_source_address(self, handler):
|
def test_source_address(self, handler):
|
||||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
verify_address_availability(source_address)
|
verify_address_availability(source_address)
|
||||||
|
|
|
@ -163,6 +163,22 @@
|
||||||
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
|
||||||
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
|
||||||
|
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
|
||||||
|
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
|
||||||
|
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
|
||||||
|
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
import errno
|
import errno
|
||||||
import fileinput
|
import fileinput
|
||||||
|
import functools
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import io
|
import io
|
||||||
import itertools
|
import itertools
|
||||||
|
@ -24,7 +25,7 @@
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .compat import functools, urllib # isort: split
|
from .compat import urllib # isort: split
|
||||||
from .compat import compat_os_name, urllib_req_to_req
|
from .compat import compat_os_name, urllib_req_to_req
|
||||||
from .cookies import LenientSimpleCookie, load_cookies
|
from .cookies import LenientSimpleCookie, load_cookies
|
||||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||||
|
@ -158,7 +159,7 @@
|
||||||
write_json_file,
|
write_json_file,
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
from .utils._utils import _YDLLogger
|
from .utils._utils import _UnsafeExtensionError, _YDLLogger
|
||||||
from .utils.networking import (
|
from .utils.networking import (
|
||||||
HTTPHeaderDict,
|
HTTPHeaderDict,
|
||||||
clean_headers,
|
clean_headers,
|
||||||
|
@ -171,6 +172,20 @@
|
||||||
import ctypes
|
import ctypes
|
||||||
|
|
||||||
|
|
||||||
|
def _catch_unsafe_extension_error(func):
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
|
try:
|
||||||
|
return func(self, *args, **kwargs)
|
||||||
|
except _UnsafeExtensionError as error:
|
||||||
|
self.report_error(
|
||||||
|
f'The extracted extension ({error.extension!r}) is unusual '
|
||||||
|
'and will be skipped for safety reasons. '
|
||||||
|
f'If you believe this is an error{bug_reports_message(",")}')
|
||||||
|
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
class YoutubeDL:
|
class YoutubeDL:
|
||||||
"""YoutubeDL class.
|
"""YoutubeDL class.
|
||||||
|
|
||||||
|
@ -437,7 +452,8 @@ class YoutubeDL:
|
||||||
Can also just be a single color policy,
|
Can also just be a single color policy,
|
||||||
in which case it applies to all outputs.
|
in which case it applies to all outputs.
|
||||||
Valid stream names are 'stdout' and 'stderr'.
|
Valid stream names are 'stdout' and 'stderr'.
|
||||||
Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
|
Valid color policies are one of 'always', 'auto',
|
||||||
|
'no_color', 'never', 'auto-tty' or 'no_color-tty'.
|
||||||
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
|
||||||
HTTP header
|
HTTP header
|
||||||
geo_bypass_country:
|
geo_bypass_country:
|
||||||
|
@ -453,8 +469,9 @@ class YoutubeDL:
|
||||||
Set the value to 'native' to use the native downloader
|
Set the value to 'native' to use the native downloader
|
||||||
compat_opts: Compatibility options. See "Differences in default behavior".
|
compat_opts: Compatibility options. See "Differences in default behavior".
|
||||||
The following options do not work when used through the API:
|
The following options do not work when used through the API:
|
||||||
filename, abort-on-error, multistreams, no-live-chat, format-sort
|
filename, abort-on-error, multistreams, no-live-chat,
|
||||||
no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
|
format-sort, no-clean-infojson, no-playlist-metafiles,
|
||||||
|
no-keep-subs, no-attach-info-json, allow-unsafe-ext.
|
||||||
Refer __init__.py for their implementation
|
Refer __init__.py for their implementation
|
||||||
progress_template: Dictionary of templates for progress outputs.
|
progress_template: Dictionary of templates for progress outputs.
|
||||||
Allowed keys are 'download', 'postprocess',
|
Allowed keys are 'download', 'postprocess',
|
||||||
|
@ -643,12 +660,15 @@ def __init__(self, params=None, auto_init=True):
|
||||||
self.params['color'] = 'no_color'
|
self.params['color'] = 'no_color'
|
||||||
|
|
||||||
term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
|
term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
|
||||||
no_color = bool(os.getenv('NO_COLOR'))
|
base_no_color = bool(os.getenv('NO_COLOR'))
|
||||||
|
|
||||||
def process_color_policy(stream):
|
def process_color_policy(stream):
|
||||||
stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
|
stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
|
||||||
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
|
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}, any)) or 'auto'
|
||||||
if policy in ('auto', None):
|
if policy in ('auto', 'auto-tty', 'no_color-tty'):
|
||||||
|
no_color = base_no_color
|
||||||
|
if policy.endswith('tty'):
|
||||||
|
no_color = policy.startswith('no_color')
|
||||||
if term_allow_color and supports_terminal_sequences(stream):
|
if term_allow_color and supports_terminal_sequences(stream):
|
||||||
return 'no_color' if no_color else True
|
return 'no_color' if no_color else True
|
||||||
return False
|
return False
|
||||||
|
@ -1399,6 +1419,7 @@ def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
|
||||||
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
|
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
|
||||||
return self.escape_outtmpl(outtmpl) % info_dict
|
return self.escape_outtmpl(outtmpl) % info_dict
|
||||||
|
|
||||||
|
@_catch_unsafe_extension_error
|
||||||
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
|
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
|
||||||
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
|
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
|
||||||
if outtmpl is None:
|
if outtmpl is None:
|
||||||
|
@ -1926,6 +1947,8 @@ def _playlist_infodict(ie_result, strict=False, **kwargs):
|
||||||
'playlist_title': ie_result.get('title'),
|
'playlist_title': ie_result.get('title'),
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
'playlist_uploader': ie_result.get('uploader'),
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||||
|
'playlist_channel': ie_result.get('channel'),
|
||||||
|
'playlist_channel_id': ie_result.get('channel_id'),
|
||||||
**kwargs,
|
**kwargs,
|
||||||
}
|
}
|
||||||
if strict:
|
if strict:
|
||||||
|
@ -2171,9 +2194,8 @@ def _select_formats(self, formats, selector):
|
||||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||||
}))
|
}))
|
||||||
|
|
||||||
def _default_format_spec(self, info_dict, download=True):
|
def _default_format_spec(self, info_dict):
|
||||||
download = download and not self.params.get('simulate')
|
prefer_best = (
|
||||||
prefer_best = download and (
|
|
||||||
self.params['outtmpl']['default'] == '-'
|
self.params['outtmpl']['default'] == '-'
|
||||||
or info_dict.get('is_live') and not self.params.get('live_from_start'))
|
or info_dict.get('is_live') and not self.params.get('live_from_start'))
|
||||||
|
|
||||||
|
@ -2181,7 +2203,7 @@ def can_merge():
|
||||||
merger = FFmpegMergerPP(self)
|
merger = FFmpegMergerPP(self)
|
||||||
return merger.available and merger.can_merge()
|
return merger.available and merger.can_merge()
|
||||||
|
|
||||||
if not prefer_best and download and not can_merge():
|
if not prefer_best and not can_merge():
|
||||||
prefer_best = True
|
prefer_best = True
|
||||||
formats = self._get_formats(info_dict)
|
formats = self._get_formats(info_dict)
|
||||||
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
|
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
|
||||||
|
@ -2940,7 +2962,7 @@ def is_wellformed(f):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if format_selector is None:
|
if format_selector is None:
|
||||||
req_format = self._default_format_spec(info_dict, download=download)
|
req_format = self._default_format_spec(info_dict)
|
||||||
self.write_debug(f'Default format spec: {req_format}')
|
self.write_debug(f'Default format spec: {req_format}')
|
||||||
format_selector = self.build_format_selector(req_format)
|
format_selector = self.build_format_selector(req_format)
|
||||||
|
|
||||||
|
@ -3150,11 +3172,12 @@ def dl(self, name, info, subtitle=False, test=False):
|
||||||
|
|
||||||
if test:
|
if test:
|
||||||
verbose = self.params.get('verbose')
|
verbose = self.params.get('verbose')
|
||||||
|
quiet = self.params.get('quiet') or not verbose
|
||||||
params = {
|
params = {
|
||||||
'test': True,
|
'test': True,
|
||||||
'quiet': self.params.get('quiet') or not verbose,
|
'quiet': quiet,
|
||||||
'verbose': verbose,
|
'verbose': verbose,
|
||||||
'noprogress': not verbose,
|
'noprogress': quiet,
|
||||||
'nopart': True,
|
'nopart': True,
|
||||||
'skip_unavailable_fragments': False,
|
'skip_unavailable_fragments': False,
|
||||||
'keep_fragments': False,
|
'keep_fragments': False,
|
||||||
|
@ -3189,6 +3212,7 @@ def existing_file(self, filepaths, *, default_overwrite=True):
|
||||||
os.remove(file)
|
os.remove(file)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@_catch_unsafe_extension_error
|
||||||
def process_info(self, info_dict):
|
def process_info(self, info_dict):
|
||||||
"""Process a single resolved IE result. (Modifies it in-place)"""
|
"""Process a single resolved IE result. (Modifies it in-place)"""
|
||||||
|
|
||||||
|
|
|
@ -64,6 +64,7 @@
|
||||||
write_string,
|
write_string,
|
||||||
)
|
)
|
||||||
from .utils.networking import std_headers
|
from .utils.networking import std_headers
|
||||||
|
from .utils._utils import _UnsafeExtensionError
|
||||||
from .YoutubeDL import YoutubeDL
|
from .YoutubeDL import YoutubeDL
|
||||||
|
|
||||||
_IN_CLI = False
|
_IN_CLI = False
|
||||||
|
@ -467,7 +468,7 @@ def metadataparser_actions(f):
|
||||||
default_downloader = ed.get_basename()
|
default_downloader = ed.get_basename()
|
||||||
|
|
||||||
for policy in opts.color.values():
|
for policy in opts.color.values():
|
||||||
if policy not in ('always', 'auto', 'no_color', 'never'):
|
if policy not in ('always', 'auto', 'auto-tty', 'no_color', 'no_color-tty', 'never'):
|
||||||
raise ValueError(f'"{policy}" is not a valid color policy')
|
raise ValueError(f'"{policy}" is not a valid color policy')
|
||||||
|
|
||||||
warnings, deprecation_warnings = [], []
|
warnings, deprecation_warnings = [], []
|
||||||
|
@ -593,6 +594,13 @@ def report_deprecation(val, old, new=None):
|
||||||
if opts.ap_username is not None and opts.ap_password is None:
|
if opts.ap_username is not None and opts.ap_password is None:
|
||||||
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
|
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
|
||||||
|
|
||||||
|
# compat option changes global state destructively; only allow from cli
|
||||||
|
if 'allow-unsafe-ext' in opts.compat_opts:
|
||||||
|
warnings.append(
|
||||||
|
'Using allow-unsafe-ext opens you up to potential attacks. '
|
||||||
|
'Use with great care!')
|
||||||
|
_UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x
|
||||||
|
|
||||||
return warnings, deprecation_warnings
|
return warnings, deprecation_warnings
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,22 @@
|
||||||
tests = {
|
|
||||||
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
|
|
||||||
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
|
|
||||||
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
|
|
||||||
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def what(file=None, h=None):
|
def what(file=None, h=None):
|
||||||
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
|
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
|
||||||
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
|
Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
|
||||||
|
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
||||||
"""
|
"""
|
||||||
if h is None:
|
if h is None:
|
||||||
with open(file, 'rb') as f:
|
with open(file, 'rb') as f:
|
||||||
h = f.read(12)
|
h = f.read(12)
|
||||||
return next((type_ for type_, test in tests.items() if test(h)), None)
|
|
||||||
|
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
|
||||||
|
return 'webp'
|
||||||
|
|
||||||
|
if h.startswith(b'\x89PNG'):
|
||||||
|
return 'png'
|
||||||
|
|
||||||
|
if h.startswith(b'\xFF\xD8\xFF'):
|
||||||
|
return 'jpeg'
|
||||||
|
|
||||||
|
if h.startswith(b'GIF'):
|
||||||
|
return 'gif'
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
import functools
|
||||||
import glob
|
import glob
|
||||||
|
import hashlib
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import http.cookies
|
import http.cookies
|
||||||
import io
|
import io
|
||||||
|
@ -17,14 +19,12 @@
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from hashlib import pbkdf2_hmac
|
|
||||||
|
|
||||||
from .aes import (
|
from .aes import (
|
||||||
aes_cbc_decrypt_bytes,
|
aes_cbc_decrypt_bytes,
|
||||||
aes_gcm_decrypt_and_verify_bytes,
|
aes_gcm_decrypt_and_verify_bytes,
|
||||||
unpad_pkcs7,
|
unpad_pkcs7,
|
||||||
)
|
)
|
||||||
from .compat import functools # isort: split
|
|
||||||
from .compat import compat_os_name
|
from .compat import compat_os_name
|
||||||
from .dependencies import (
|
from .dependencies import (
|
||||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||||
|
@ -740,20 +740,19 @@ def _get_linux_desktop_environment(env, logger):
|
||||||
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
|
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
|
||||||
desktop_session = env.get('DESKTOP_SESSION', None)
|
desktop_session = env.get('DESKTOP_SESSION', None)
|
||||||
if xdg_current_desktop is not None:
|
if xdg_current_desktop is not None:
|
||||||
xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
|
for part in map(str.strip, xdg_current_desktop.split(':')):
|
||||||
|
if part == 'Unity':
|
||||||
if xdg_current_desktop == 'Unity':
|
|
||||||
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
||||||
return _LinuxDesktopEnvironment.GNOME
|
return _LinuxDesktopEnvironment.GNOME
|
||||||
else:
|
else:
|
||||||
return _LinuxDesktopEnvironment.UNITY
|
return _LinuxDesktopEnvironment.UNITY
|
||||||
elif xdg_current_desktop == 'Deepin':
|
elif part == 'Deepin':
|
||||||
return _LinuxDesktopEnvironment.DEEPIN
|
return _LinuxDesktopEnvironment.DEEPIN
|
||||||
elif xdg_current_desktop == 'GNOME':
|
elif part == 'GNOME':
|
||||||
return _LinuxDesktopEnvironment.GNOME
|
return _LinuxDesktopEnvironment.GNOME
|
||||||
elif xdg_current_desktop == 'X-Cinnamon':
|
elif part == 'X-Cinnamon':
|
||||||
return _LinuxDesktopEnvironment.CINNAMON
|
return _LinuxDesktopEnvironment.CINNAMON
|
||||||
elif xdg_current_desktop == 'KDE':
|
elif part == 'KDE':
|
||||||
kde_version = env.get('KDE_SESSION_VERSION', None)
|
kde_version = env.get('KDE_SESSION_VERSION', None)
|
||||||
if kde_version == '5':
|
if kde_version == '5':
|
||||||
return _LinuxDesktopEnvironment.KDE5
|
return _LinuxDesktopEnvironment.KDE5
|
||||||
|
@ -764,15 +763,14 @@ def _get_linux_desktop_environment(env, logger):
|
||||||
else:
|
else:
|
||||||
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
|
||||||
return _LinuxDesktopEnvironment.KDE4
|
return _LinuxDesktopEnvironment.KDE4
|
||||||
elif xdg_current_desktop == 'Pantheon':
|
elif part == 'Pantheon':
|
||||||
return _LinuxDesktopEnvironment.PANTHEON
|
return _LinuxDesktopEnvironment.PANTHEON
|
||||||
elif xdg_current_desktop == 'XFCE':
|
elif part == 'XFCE':
|
||||||
return _LinuxDesktopEnvironment.XFCE
|
return _LinuxDesktopEnvironment.XFCE
|
||||||
elif xdg_current_desktop == 'UKUI':
|
elif part == 'UKUI':
|
||||||
return _LinuxDesktopEnvironment.UKUI
|
return _LinuxDesktopEnvironment.UKUI
|
||||||
elif xdg_current_desktop == 'LXQt':
|
elif part == 'LXQt':
|
||||||
return _LinuxDesktopEnvironment.LXQT
|
return _LinuxDesktopEnvironment.LXQT
|
||||||
else:
|
|
||||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||||
|
|
||||||
elif desktop_session is not None:
|
elif desktop_session is not None:
|
||||||
|
@ -1001,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger):
|
||||||
|
|
||||||
|
|
||||||
def pbkdf2_sha1(password, salt, iterations, key_length):
|
def pbkdf2_sha1(password, salt, iterations, key_length):
|
||||||
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
|
||||||
|
|
||||||
|
|
||||||
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import enum
|
import enum
|
||||||
|
import functools
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -9,7 +10,6 @@
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .fragment import FragmentFD
|
from .fragment import FragmentFD
|
||||||
from ..compat import functools
|
|
||||||
from ..networking import Request
|
from ..networking import Request
|
||||||
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
|
|
@ -76,6 +76,7 @@
|
||||||
)
|
)
|
||||||
from .aeonco import AeonCoIE
|
from .aeonco import AeonCoIE
|
||||||
from .afreecatv import (
|
from .afreecatv import (
|
||||||
|
AfreecaTVCatchStoryIE,
|
||||||
AfreecaTVIE,
|
AfreecaTVIE,
|
||||||
AfreecaTVLiveIE,
|
AfreecaTVLiveIE,
|
||||||
AfreecaTVUserIE,
|
AfreecaTVUserIE,
|
||||||
|
@ -503,7 +504,6 @@
|
||||||
from .digitalconcerthall import DigitalConcertHallIE
|
from .digitalconcerthall import DigitalConcertHallIE
|
||||||
from .digiteka import DigitekaIE
|
from .digiteka import DigitekaIE
|
||||||
from .discogs import DiscogsReleasePlaylistIE
|
from .discogs import DiscogsReleasePlaylistIE
|
||||||
from .discovery import DiscoveryIE
|
|
||||||
from .disney import DisneyIE
|
from .disney import DisneyIE
|
||||||
from .dispeak import DigitallySpeakingIE
|
from .dispeak import DigitallySpeakingIE
|
||||||
from .dlf import (
|
from .dlf import (
|
||||||
|
@ -531,16 +531,12 @@
|
||||||
DiscoveryPlusIndiaShowIE,
|
DiscoveryPlusIndiaShowIE,
|
||||||
DiscoveryPlusItalyIE,
|
DiscoveryPlusItalyIE,
|
||||||
DiscoveryPlusItalyShowIE,
|
DiscoveryPlusItalyShowIE,
|
||||||
DIYNetworkIE,
|
|
||||||
DPlayIE,
|
DPlayIE,
|
||||||
FoodNetworkIE,
|
FoodNetworkIE,
|
||||||
GlobalCyclingNetworkPlusIE,
|
|
||||||
GoDiscoveryIE,
|
GoDiscoveryIE,
|
||||||
HGTVDeIE,
|
HGTVDeIE,
|
||||||
HGTVUsaIE,
|
HGTVUsaIE,
|
||||||
InvestigationDiscoveryIE,
|
InvestigationDiscoveryIE,
|
||||||
MotorTrendIE,
|
|
||||||
MotorTrendOnDemandIE,
|
|
||||||
ScienceChannelIE,
|
ScienceChannelIE,
|
||||||
TravelChannelIE,
|
TravelChannelIE,
|
||||||
)
|
)
|
||||||
|
@ -779,6 +775,7 @@
|
||||||
from .goshgay import GoshgayIE
|
from .goshgay import GoshgayIE
|
||||||
from .gotostage import GoToStageIE
|
from .gotostage import GoToStageIE
|
||||||
from .gputechconf import GPUTechConfIE
|
from .gputechconf import GPUTechConfIE
|
||||||
|
from .graspop import GraspopIE
|
||||||
from .gronkh import (
|
from .gronkh import (
|
||||||
GronkhFeedIE,
|
GronkhFeedIE,
|
||||||
GronkhIE,
|
GronkhIE,
|
||||||
|
@ -942,6 +939,7 @@
|
||||||
KhanAcademyUnitIE,
|
KhanAcademyUnitIE,
|
||||||
)
|
)
|
||||||
from .kick import (
|
from .kick import (
|
||||||
|
KickClipIE,
|
||||||
KickIE,
|
KickIE,
|
||||||
KickVODIE,
|
KickVODIE,
|
||||||
)
|
)
|
||||||
|
@ -969,6 +967,10 @@
|
||||||
LA7PodcastEpisodeIE,
|
LA7PodcastEpisodeIE,
|
||||||
LA7PodcastIE,
|
LA7PodcastIE,
|
||||||
)
|
)
|
||||||
|
from .laracasts import (
|
||||||
|
LaracastsIE,
|
||||||
|
LaracastsPlaylistIE,
|
||||||
|
)
|
||||||
from .lastfm import (
|
from .lastfm import (
|
||||||
LastFMIE,
|
LastFMIE,
|
||||||
LastFMPlaylistIE,
|
LastFMPlaylistIE,
|
||||||
|
@ -985,6 +987,7 @@
|
||||||
LcpIE,
|
LcpIE,
|
||||||
LcpPlayIE,
|
LcpPlayIE,
|
||||||
)
|
)
|
||||||
|
from .learningonscreen import LearningOnScreenIE
|
||||||
from .lecture2go import Lecture2GoIE
|
from .lecture2go import Lecture2GoIE
|
||||||
from .lecturio import (
|
from .lecturio import (
|
||||||
LecturioCourseIE,
|
LecturioCourseIE,
|
||||||
|
@ -1113,12 +1116,15 @@
|
||||||
from .melonvod import MelonVODIE
|
from .melonvod import MelonVODIE
|
||||||
from .metacritic import MetacriticIE
|
from .metacritic import MetacriticIE
|
||||||
from .mgtv import MGTVIE
|
from .mgtv import MGTVIE
|
||||||
from .microsoftembed import MicrosoftEmbedIE
|
from .microsoftembed import (
|
||||||
from .microsoftstream import MicrosoftStreamIE
|
MicrosoftBuildIE,
|
||||||
from .microsoftvirtualacademy import (
|
MicrosoftEmbedIE,
|
||||||
MicrosoftVirtualAcademyCourseIE,
|
MicrosoftLearnEpisodeIE,
|
||||||
MicrosoftVirtualAcademyIE,
|
MicrosoftLearnPlaylistIE,
|
||||||
|
MicrosoftLearnSessionIE,
|
||||||
|
MicrosoftMediusIE,
|
||||||
)
|
)
|
||||||
|
from .microsoftstream import MicrosoftStreamIE
|
||||||
from .mildom import (
|
from .mildom import (
|
||||||
MildomClipIE,
|
MildomClipIE,
|
||||||
MildomIE,
|
MildomIE,
|
||||||
|
@ -1603,6 +1609,7 @@
|
||||||
QQMusicPlaylistIE,
|
QQMusicPlaylistIE,
|
||||||
QQMusicSingerIE,
|
QQMusicSingerIE,
|
||||||
QQMusicToplistIE,
|
QQMusicToplistIE,
|
||||||
|
QQMusicVideoIE,
|
||||||
)
|
)
|
||||||
from .r7 import (
|
from .r7 import (
|
||||||
R7IE,
|
R7IE,
|
||||||
|
@ -2164,10 +2171,7 @@
|
||||||
TV5UnisVideoIE,
|
TV5UnisVideoIE,
|
||||||
)
|
)
|
||||||
from .tv24ua import TV24UAVideoIE
|
from .tv24ua import TV24UAVideoIE
|
||||||
from .tva import (
|
from .tva import TVAIE
|
||||||
TVAIE,
|
|
||||||
QubIE,
|
|
||||||
)
|
|
||||||
from .tvanouvelles import (
|
from .tvanouvelles import (
|
||||||
TVANouvellesArticleIE,
|
TVANouvellesArticleIE,
|
||||||
TVANouvellesIE,
|
TVANouvellesIE,
|
||||||
|
@ -2314,6 +2318,7 @@
|
||||||
)
|
)
|
||||||
from .vidlii import VidLiiIE
|
from .vidlii import VidLiiIE
|
||||||
from .vidly import VidlyIE
|
from .vidly import VidlyIE
|
||||||
|
from .vidyard import VidyardIE
|
||||||
from .viewlift import (
|
from .viewlift import (
|
||||||
ViewLiftEmbedIE,
|
ViewLiftEmbedIE,
|
||||||
ViewLiftIE,
|
ViewLiftIE,
|
||||||
|
@ -2379,6 +2384,10 @@
|
||||||
VrtNUIE,
|
VrtNUIE,
|
||||||
)
|
)
|
||||||
from .vtm import VTMIE
|
from .vtm import VTMIE
|
||||||
|
from .vtv import (
|
||||||
|
VTVIE,
|
||||||
|
VTVGoIE,
|
||||||
|
)
|
||||||
from .vuclip import VuClipIE
|
from .vuclip import VuClipIE
|
||||||
from .vvvvid import (
|
from .vvvvid import (
|
||||||
VVVVIDIE,
|
VVVVIDIE,
|
||||||
|
|
|
@ -9,12 +9,12 @@
|
||||||
import struct
|
import struct
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import urllib.request
|
|
||||||
import urllib.response
|
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..aes import aes_ecb_decrypt
|
from ..aes import aes_ecb_decrypt
|
||||||
|
from ..networking import RequestHandler, Response
|
||||||
|
from ..networking.exceptions import TransportError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
|
@ -26,37 +26,36 @@
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
from ..utils.networking import clean_proxies
|
|
||||||
|
|
||||||
|
|
||||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
class AbemaLicenseRH(RequestHandler):
|
||||||
"""Add a handler for opening URLs, like _download_webpage"""
|
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
|
||||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
_SUPPORTED_PROXY_SCHEMES = None
|
||||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
_SUPPORTED_FEATURES = None
|
||||||
rh = ydl._request_director.handlers['Urllib']
|
RH_NAME = 'abematv_license'
|
||||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
|
||||||
return
|
|
||||||
headers = ydl.params['http_headers'].copy()
|
|
||||||
proxies = ydl.proxies.copy()
|
|
||||||
clean_proxies(proxies, headers)
|
|
||||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
|
||||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
|
||||||
opener.add_handler(handler)
|
|
||||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
|
||||||
|
|
||||||
|
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
||||||
|
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
||||||
|
|
||||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
|
||||||
handler_order = 499
|
super().__init__(**kwargs)
|
||||||
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
|
|
||||||
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
|
|
||||||
|
|
||||||
def __init__(self, ie: 'AbemaTVIE'):
|
|
||||||
# the protocol that this should really handle is 'abematv-license://'
|
|
||||||
# abematv_license_open is just a placeholder for development purposes
|
|
||||||
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
|
|
||||||
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
|
|
||||||
self.ie = ie
|
self.ie = ie
|
||||||
|
|
||||||
|
def _send(self, request):
|
||||||
|
url = request.url
|
||||||
|
ticket = urllib.parse.urlparse(url).netloc
|
||||||
|
|
||||||
|
try:
|
||||||
|
response_data = self._get_videokey_from_ticket(ticket)
|
||||||
|
except ExtractorError as e:
|
||||||
|
raise TransportError(cause=e.cause) from e
|
||||||
|
except (IndexError, KeyError, TypeError) as e:
|
||||||
|
raise TransportError(cause=repr(e)) from e
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
io.BytesIO(response_data), url,
|
||||||
|
headers={'Content-Length': str(len(response_data))})
|
||||||
|
|
||||||
def _get_videokey_from_ticket(self, ticket):
|
def _get_videokey_from_ticket(self, ticket):
|
||||||
to_show = self.ie.get_param('verbose', False)
|
to_show = self.ie.get_param('verbose', False)
|
||||||
media_token = self.ie._get_media_token(to_show=to_show)
|
media_token = self.ie._get_media_token(to_show=to_show)
|
||||||
|
@ -72,25 +71,17 @@ def _get_videokey_from_ticket(self, ticket):
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
})
|
})
|
||||||
|
|
||||||
res = decode_base_n(license_response['k'], table=self.STRTABLE)
|
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||||
|
|
||||||
h = hmac.new(
|
h = hmac.new(
|
||||||
binascii.unhexlify(self.HKEY),
|
binascii.unhexlify(self._HKEY),
|
||||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||||
digestmod=hashlib.sha256)
|
digestmod=hashlib.sha256)
|
||||||
enckey = bytes_to_intlist(h.digest())
|
enckey = bytes_to_intlist(h.digest())
|
||||||
|
|
||||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||||
|
|
||||||
def abematv_license_open(self, url):
|
|
||||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
|
||||||
ticket = urllib.parse.urlparse(url).netloc
|
|
||||||
response_data = self._get_videokey_from_ticket(ticket)
|
|
||||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
|
||||||
'Content-Length': str(len(response_data)),
|
|
||||||
}, url=url, code=200)
|
|
||||||
|
|
||||||
|
|
||||||
class AbemaTVBaseIE(InfoExtractor):
|
class AbemaTVBaseIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'abematv'
|
_NETRC_MACHINE = 'abematv'
|
||||||
|
@ -139,7 +130,7 @@ def _get_device_token(self):
|
||||||
if self._USERTOKEN:
|
if self._USERTOKEN:
|
||||||
return self._USERTOKEN
|
return self._USERTOKEN
|
||||||
|
|
||||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
|
||||||
|
|
||||||
username, _ = self._get_login_info()
|
username, _ = self._get_login_info()
|
||||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||||
|
@ -368,6 +359,7 @@ def _real_extract(self, url):
|
||||||
info['episode_number'] = epis if epis < 2000 else None
|
info['episode_number'] = epis if epis < 2000 else None
|
||||||
|
|
||||||
is_live, m3u8_url = False, None
|
is_live, m3u8_url = False, None
|
||||||
|
availability = 'public'
|
||||||
if video_type == 'now-on-air':
|
if video_type == 'now-on-air':
|
||||||
is_live = True
|
is_live = True
|
||||||
channel_url = 'https://api.abema.io/v1/channels'
|
channel_url = 'https://api.abema.io/v1/channels'
|
||||||
|
@ -385,10 +377,10 @@ def _real_extract(self, url):
|
||||||
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
|
||||||
note='Checking playability',
|
note='Checking playability',
|
||||||
headers=headers)
|
headers=headers)
|
||||||
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
|
if not traverse_obj(api_response, ('label', 'free', {bool})):
|
||||||
if 3 not in ondemand_types:
|
|
||||||
# cannot acquire decryption key for these streams
|
# cannot acquire decryption key for these streams
|
||||||
self.report_warning('This is a premium-only stream')
|
self.report_warning('This is a premium-only stream')
|
||||||
|
availability = 'premium_only'
|
||||||
info.update(traverse_obj(api_response, {
|
info.update(traverse_obj(api_response, {
|
||||||
'series': ('series', 'title'),
|
'series': ('series', 'title'),
|
||||||
'season': ('season', 'name'),
|
'season': ('season', 'name'),
|
||||||
|
@ -408,6 +400,7 @@ def _real_extract(self, url):
|
||||||
headers=headers)
|
headers=headers)
|
||||||
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
|
||||||
self.report_warning('This is a premium-only stream')
|
self.report_warning('This is a premium-only stream')
|
||||||
|
availability = 'premium_only'
|
||||||
|
|
||||||
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
|
||||||
else:
|
else:
|
||||||
|
@ -425,6 +418,7 @@ def _real_extract(self, url):
|
||||||
'description': description,
|
'description': description,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
|
'availability': availability,
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
|
join_nonempty,
|
||||||
long_to_bytes,
|
long_to_bytes,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
pkcs1pad,
|
pkcs1pad,
|
||||||
|
@ -48,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class ADNIE(ADNBaseIE):
|
class ADNIE(ADNBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9841',
|
'id': '9841',
|
||||||
|
@ -70,10 +71,10 @@ class ADNIE(ADNBaseIE):
|
||||||
},
|
},
|
||||||
'skip': 'Only available in French and German speaking Europe',
|
'skip': 'Only available in French and German speaking Europe',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
|
'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
|
||||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '23550',
|
'id': '23550',
|
||||||
|
@ -217,7 +218,7 @@ def _real_extract(self, url):
|
||||||
links_data = self._download_json(
|
links_data = self._download_json(
|
||||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||||
'X-Player-Token': authorization,
|
'X-Player-Token': authorization,
|
||||||
'X-Target-Distribution': lang,
|
'X-Target-Distribution': lang or 'fr',
|
||||||
**self._HEADERS,
|
**self._HEADERS,
|
||||||
}, query={
|
}, query={
|
||||||
'freeWithAds': 'true',
|
'freeWithAds': 'true',
|
||||||
|
@ -298,9 +299,9 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class ADNSeasonIE(ADNBaseIE):
|
class ADNSeasonIE(ADNBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
|
'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new',
|
||||||
'playlist_count': 12,
|
'playlist_count': 12,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '911',
|
'id': '911',
|
||||||
|
@ -318,7 +319,7 @@ def _real_extract(self, url):
|
||||||
episodes = self._download_json(
|
episodes = self._download_json(
|
||||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||||
'Downloading episode list', headers={
|
'Downloading episode list', headers={
|
||||||
'X-Target-Distribution': lang,
|
'X-Target-Distribution': lang or 'fr',
|
||||||
**self._HEADERS,
|
**self._HEADERS,
|
||||||
}, query={
|
}, query={
|
||||||
'order': 'asc',
|
'order': 'asc',
|
||||||
|
@ -327,8 +328,8 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
def entries():
|
def entries():
|
||||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||||
yield self.url_result(
|
yield self.url_result(join_nonempty(
|
||||||
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
|
'https://animationdigitalnetwork.com', lang, 'video',
|
||||||
ADNIE, episode_id)
|
video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
|
||||||
|
|
||||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import functools
|
import functools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking import Request
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
|
@ -58,6 +59,13 @@ def _perform_login(self, username, password):
|
||||||
f'Unable to login: {self.IE_NAME} said: {error}',
|
f'Unable to login: {self.IE_NAME} said: {error}',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
|
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||||
|
return self._download_json(Request(
|
||||||
|
f'https://api.m.afreecatv.com/{endpoint}',
|
||||||
|
data=data, headers=headers, query=query,
|
||||||
|
extensions={'legacy_ssl': True}), display_id,
|
||||||
|
'Downloading API JSON', 'Unable to download API JSON')
|
||||||
|
|
||||||
|
|
||||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||||
IE_NAME = 'afreecatv'
|
IE_NAME = 'afreecatv'
|
||||||
|
@ -72,7 +80,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||||
)\?.*?\bnTitleNo=|
|
)\?.*?\bnTitleNo=|
|
||||||
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
vod\.afreecatv\.com/(PLAYER/STATION|player)/
|
||||||
)
|
)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)/?(?:$|[?#&])
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||||
|
@ -184,9 +192,9 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
data = self._download_json(
|
data = self._call_api(
|
||||||
'https://api.m.afreecatv.com/station/video/a/view', video_id,
|
'station/video/a/view', video_id, headers={'Referer': url},
|
||||||
headers={'Referer': url}, data=urlencode_postdata({
|
data=urlencode_postdata({
|
||||||
'nTitleNo': video_id,
|
'nTitleNo': video_id,
|
||||||
'nApiLevel': 10,
|
'nApiLevel': 10,
|
||||||
}))['data']
|
}))['data']
|
||||||
|
@ -253,6 +261,43 @@ def _real_extract(self, url):
|
||||||
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
||||||
|
|
||||||
|
|
||||||
|
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
|
||||||
|
IE_NAME = 'afreecatv:catchstory'
|
||||||
|
IE_DESC = 'afreecatv.com catch story'
|
||||||
|
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '103247',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
data = self._call_api(
|
||||||
|
'catchstory/a/view', video_id, headers={'Referer': url},
|
||||||
|
query={'aStoryListIdx': '', 'nStoryIdx': video_id})
|
||||||
|
|
||||||
|
return self.playlist_result(self._entries(data), video_id)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _entries(data):
|
||||||
|
# 'files' is always a list with 1 element
|
||||||
|
yield from traverse_obj(data, (
|
||||||
|
'data', lambda _, v: v['story_type'] == 'catch',
|
||||||
|
'catch_list', lambda _, v: v['files'][0]['file'], {
|
||||||
|
'id': ('files', 0, 'file_info_key', {str}),
|
||||||
|
'url': ('files', 0, 'file', {url_or_none}),
|
||||||
|
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'uploader': ('writer_nick', {str}),
|
||||||
|
'uploader_id': ('writer_id', {str}),
|
||||||
|
'thumbnail': ('thumb', {url_or_none}),
|
||||||
|
'timestamp': ('write_timestamp', {int_or_none}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
|
||||||
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||||
IE_NAME = 'afreecatv:live'
|
IE_NAME = 'afreecatv:live'
|
||||||
IE_DESC = 'afreecatv.com livestreams'
|
IE_DESC = 'afreecatv.com livestreams'
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
|
@ -136,7 +137,7 @@ def _real_extract(self, url):
|
||||||
else:
|
else:
|
||||||
vbr = int_or_none(s.get('bitrate'))
|
vbr = int_or_none(s.get('bitrate'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
|
'format_id': join_nonempty(stream_type, vbr),
|
||||||
'vbr': vbr,
|
'vbr': vbr,
|
||||||
'width': int_or_none(s.get('width')),
|
'width': int_or_none(s.get('width')),
|
||||||
'height': int_or_none(s.get('height')),
|
'height': int_or_none(s.get('height')),
|
||||||
|
|
|
@ -131,8 +131,8 @@ def _real_extract(self, url):
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
href, video_id, f4m_id='hds', fatal=False))
|
href, video_id, f4m_id='hds', fatal=False))
|
||||||
elif mime_type == 'application/dash+xml':
|
elif mime_type == 'application/dash+xml':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
href, video_id, f4m_id='hds', fatal=False))
|
href, video_id, mpd_id='dash', fatal=False))
|
||||||
elif mime_type == 'application/vnd.ms-sstr+xml':
|
elif mime_type == 'application/vnd.ms-sstr+xml':
|
||||||
formats.extend(self._extract_ism_formats(
|
formats.extend(self._extract_ism_formats(
|
||||||
href, video_id, ism_id='mss', fatal=False))
|
href, video_id, ism_id='mss', fatal=False))
|
||||||
|
|
|
@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
|
||||||
]
|
]
|
||||||
_API_BASE = 'https://api.atresplayer.com/'
|
_API_BASE = 'https://api.atresplayer.com/'
|
||||||
|
|
||||||
def _handle_error(self, e, code):
|
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
|
||||||
error = self._parse_json(e.cause.response.read(), None)
|
|
||||||
if error.get('error') == 'required_registered':
|
|
||||||
self.raise_login_required()
|
|
||||||
raise ExtractorError(error['error_description'], expected=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
self._request_webpage(
|
self._request_webpage(
|
||||||
self._API_BASE + 'login', None, 'Downloading login page')
|
self._API_BASE + 'login', None, 'Downloading login page')
|
||||||
|
@ -55,7 +47,9 @@ def _perform_login(self, username, password):
|
||||||
'password': password,
|
'password': password,
|
||||||
}))['targetUrl']
|
}))['targetUrl']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
self._handle_error(e, 400)
|
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||||
|
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
self._request_webpage(target_url, None, 'Following Target URL')
|
self._request_webpage(target_url, None, 'Following Target URL')
|
||||||
|
|
||||||
|
@ -66,7 +60,12 @@ def _real_extract(self, url):
|
||||||
episode = self._download_json(
|
episode = self._download_json(
|
||||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
self._handle_error(e, 403)
|
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||||
|
error = self._parse_json(e.cause.response.read(), None)
|
||||||
|
if error.get('error') == 'required_registered':
|
||||||
|
self.raise_login_required()
|
||||||
|
raise ExtractorError(error['error_description'], expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
title = episode['titulo']
|
title = episode['titulo']
|
||||||
|
|
||||||
|
|
|
@ -4,9 +4,13 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
|
determine_ext,
|
||||||
format_field,
|
format_field,
|
||||||
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +34,7 @@ def _extract_playlist(self, playlist_id):
|
||||||
class BanByeIE(BanByeBaseIE):
|
class BanByeIE(BanByeBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -58,6 +63,7 @@ class BanByeIE(BanByeBaseIE):
|
||||||
},
|
},
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 9,
|
||||||
}, {
|
}, {
|
||||||
|
# ['src']['mp4']['levels'] direct mp4 urls only
|
||||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'v_kb6_o1Kyq-CD',
|
'id': 'v_kb6_o1Kyq-CD',
|
||||||
|
@ -77,6 +83,48 @@ class BanByeIE(BanByeBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# ['src']['hls']['levels'] variant m3u8 urls only; master m3u8 is 404
|
||||||
|
'url': 'https://banbye.com/watch/v_a_gPFuC9LoW5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v_a_gPFuC9LoW5',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:183524056bebdfa245fd6d214f63c0fe',
|
||||||
|
'description': 'md5:943ac87287ca98d28d8b8797719827c6',
|
||||||
|
'uploader': 'wRealu24',
|
||||||
|
'channel_id': 'ch_wrealu24',
|
||||||
|
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
|
||||||
|
'upload_date': '20231113',
|
||||||
|
'timestamp': 1699874062,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'thumbnail': 'https://cdn.banbye.com/video/v_a_gPFuC9LoW5/96.webp',
|
||||||
|
'tags': ['jaszczur', 'sejm', 'lewica', 'polska', 'ukrainizacja', 'pierwszeposiedzeniesejmu'],
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to download m3u8'],
|
||||||
|
}, {
|
||||||
|
# ['src']['hls']['masterPlaylist'] m3u8 only
|
||||||
|
'url': 'https://banbye.com/watch/v_B0rsKWsr-aaa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v_B0rsKWsr-aaa',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:00b254164b82101b3f9e5326037447ed',
|
||||||
|
'description': 'md5:3fd8b48aa81954ba024bc60f5de6e167',
|
||||||
|
'uploader': 'PSTV Piotr Szlachtowicz ',
|
||||||
|
'channel_id': 'ch_KV9EVObkB9wB',
|
||||||
|
'channel_url': 'https://banbye.com/channel/ch_KV9EVObkB9wB',
|
||||||
|
'upload_date': '20240629',
|
||||||
|
'timestamp': 1719646816,
|
||||||
|
'duration': 2377,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'thumbnail': 'https://cdn.banbye.com/video/v_B0rsKWsr-aaa/96.webp',
|
||||||
|
'tags': ['Biden', 'Trump', 'Wybory', 'USA'],
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -91,11 +139,24 @@ def _real_extract(self, url):
|
||||||
'id': f'{quality}p',
|
'id': f'{quality}p',
|
||||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
|
||||||
} for quality in [48, 96, 144, 240, 512, 1080]]
|
} for quality in [48, 96, 144, 240, 512, 1080]]
|
||||||
formats = [{
|
|
||||||
'format_id': f'http-{quality}p',
|
formats = []
|
||||||
'quality': quality,
|
url_data = self._download_json(f'{self._API_BASE}/videos/{video_id}/url', video_id, data=b'')
|
||||||
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
|
if master_url := traverse_obj(url_data, ('src', 'hls', 'masterPlaylist', {url_or_none})):
|
||||||
} for quality in data['quality']]
|
formats = self._extract_m3u8_formats(master_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||||
|
|
||||||
|
for format_id, format_url in traverse_obj(url_data, (
|
||||||
|
'src', ('mp4', 'hls'), 'levels', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
is_hls = ext == 'm3u8'
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'ext': 'mp4' if is_hls else ext,
|
||||||
|
'format_id': join_nonempty(is_hls and 'hls', format_id),
|
||||||
|
'protocol': 'm3u8_native' if is_hls else 'https',
|
||||||
|
'height': int_or_none(format_id),
|
||||||
|
})
|
||||||
|
self._remove_duplicate_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
|
@ -41,7 +41,7 @@ class BandcampIE(InfoExtractor):
|
||||||
'uploader_id': 'youtube-dl',
|
'uploader_id': 'youtube-dl',
|
||||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||||
},
|
},
|
||||||
'_skip': 'There is a limit of 200 free downloads / month for the test song',
|
'skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||||
}, {
|
}, {
|
||||||
# free download
|
# free download
|
||||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||||
|
|
|
@ -31,12 +31,12 @@
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
|
parse_resolution,
|
||||||
qualities,
|
qualities,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -47,6 +47,23 @@
|
||||||
|
|
||||||
class BilibiliBaseIE(InfoExtractor):
|
class BilibiliBaseIE(InfoExtractor):
|
||||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||||
|
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
|
||||||
|
_wbi_key_cache = {}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_logged_in(self):
|
||||||
|
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
|
||||||
|
|
||||||
|
def _check_missing_formats(self, play_info, formats):
|
||||||
|
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
|
||||||
|
missing_formats = join_nonempty(*[
|
||||||
|
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
|
||||||
|
for fmt in traverse_obj(play_info, (
|
||||||
|
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||||
|
if missing_formats:
|
||||||
|
self.to_screen(
|
||||||
|
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||||
|
f'become a premium member to download them. {self._login_hint()}')
|
||||||
|
|
||||||
def extract_formats(self, play_info):
|
def extract_formats(self, play_info):
|
||||||
format_names = {
|
format_names = {
|
||||||
|
@ -86,18 +103,75 @@ def extract_formats(self, play_info):
|
||||||
'format': format_names.get(video.get('id')),
|
'format': format_names.get(video.get('id')),
|
||||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||||
|
|
||||||
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
|
if formats:
|
||||||
if missing_formats:
|
self._check_missing_formats(play_info, formats)
|
||||||
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
|
|
||||||
f'you have to login or become premium member to download them. {self._login_hint()}')
|
|
||||||
|
|
||||||
|
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
|
||||||
|
'url': ('url', {url_or_none}),
|
||||||
|
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
'filesize': ('size', {int_or_none}),
|
||||||
|
}))
|
||||||
|
if fragments:
|
||||||
|
formats.append({
|
||||||
|
'url': fragments[0]['url'],
|
||||||
|
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
|
||||||
|
**({
|
||||||
|
'fragments': fragments,
|
||||||
|
'protocol': 'http_dash_segments',
|
||||||
|
} if len(fragments) > 1 else {}),
|
||||||
|
**traverse_obj(play_info, {
|
||||||
|
'quality': ('quality', {int_or_none}),
|
||||||
|
'format_id': ('quality', {str_or_none}),
|
||||||
|
'format_note': ('quality', {lambda x: format_names.get(x)}),
|
||||||
|
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
}),
|
||||||
|
**parse_resolution(format_names.get(play_info.get('quality'))),
|
||||||
|
})
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _download_playinfo(self, video_id, cid, headers=None):
|
def _get_wbi_key(self, video_id):
|
||||||
|
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
|
||||||
|
return self._wbi_key_cache['key']
|
||||||
|
|
||||||
|
session_data = self._download_json(
|
||||||
|
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
|
||||||
|
|
||||||
|
lookup = ''.join(traverse_obj(session_data, (
|
||||||
|
'data', 'wbi_img', ('img_url', 'sub_url'),
|
||||||
|
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
|
||||||
|
|
||||||
|
# from getMixinKey() in the vendor js
|
||||||
|
mixin_key_enc_tab = [
|
||||||
|
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
|
||||||
|
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
|
||||||
|
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
|
||||||
|
36, 20, 34, 44, 52,
|
||||||
|
]
|
||||||
|
|
||||||
|
self._wbi_key_cache.update({
|
||||||
|
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
|
||||||
|
'ts': time.time(),
|
||||||
|
})
|
||||||
|
return self._wbi_key_cache['key']
|
||||||
|
|
||||||
|
def _sign_wbi(self, params, video_id):
|
||||||
|
params['wts'] = round(time.time())
|
||||||
|
params = {
|
||||||
|
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
|
||||||
|
for k, v in sorted(params.items())
|
||||||
|
}
|
||||||
|
query = urllib.parse.urlencode(params)
|
||||||
|
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||||
|
return params
|
||||||
|
|
||||||
|
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||||
|
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||||
|
if qn:
|
||||||
|
params['qn'] = qn
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
query=self._sign_wbi(params, bvid), headers=headers,
|
||||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||||
|
|
||||||
def json2srt(self, json_data):
|
def json2srt(self, json_data):
|
||||||
srt_data = ''
|
srt_data = ''
|
||||||
|
@ -115,15 +189,15 @@ def _get_subtitles(self, video_id, cid, aid=None):
|
||||||
}],
|
}],
|
||||||
}
|
}
|
||||||
|
|
||||||
subtitle_info = traverse_obj(self._download_json(
|
video_info = self._download_json(
|
||||||
'https://api.bilibili.com/x/player/v2', video_id,
|
'https://api.bilibili.com/x/player/v2', video_id,
|
||||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||||
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
|
note=f'Extracting subtitle info {cid}')
|
||||||
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
|
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||||
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
|
self.report_warning(
|
||||||
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
|
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
|
||||||
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
|
for s in traverse_obj(video_info, (
|
||||||
for s in subs_list:
|
'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
|
||||||
subtitles.setdefault(s['lan'], []).append({
|
subtitles.setdefault(s['lan'], []).append({
|
||||||
'ext': 'srt',
|
'ext': 'srt',
|
||||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
||||||
|
@ -203,15 +277,15 @@ def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None
|
||||||
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
|
||||||
return cid_edges
|
return cid_edges
|
||||||
|
|
||||||
def _get_interactive_entries(self, video_id, cid, metainfo):
|
def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
|
||||||
graph_version = traverse_obj(
|
graph_version = traverse_obj(
|
||||||
self._download_json(
|
self._download_json(
|
||||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||||
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
|
'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
|
||||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||||
for cid, edges in cid_edges.items():
|
for cid, edges in cid_edges.items():
|
||||||
play_info = self._download_playinfo(video_id, cid)
|
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||||
yield {
|
yield {
|
||||||
**metainfo,
|
**metainfo,
|
||||||
'id': f'{video_id}_{cid}',
|
'id': f'{video_id}_{cid}',
|
||||||
|
@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'timestamp': 1488353834,
|
'timestamp': 1488353834,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'old av URL version',
|
'note': 'old av URL version',
|
||||||
'url': 'http://www.bilibili.com/video/av1074402/',
|
'url': 'http://www.bilibili.com/video/av1074402/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
'id': 'BV11x411K7CN',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': '【金坷垃】金泡沫',
|
||||||
'uploader': '菊子桑',
|
'uploader': '菊子桑',
|
||||||
'uploader_id': '156160',
|
'uploader_id': '156160',
|
||||||
'id': 'BV11x411K7CN',
|
|
||||||
'title': '【金坷垃】金泡沫',
|
|
||||||
'duration': 308.36,
|
'duration': 308.36,
|
||||||
'upload_date': '20140420',
|
'upload_date': '20140420',
|
||||||
'timestamp': 1397983878,
|
'timestamp': 1397983878,
|
||||||
|
@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'tags': list,
|
'tags': list,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
|
||||||
|
'_old_archive_ids': ['bilibili 1074402_part1'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -288,6 +364,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||||
'duration': 90.314,
|
'duration': 90.314,
|
||||||
|
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
|
@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||||
'duration': 90.314,
|
'duration': 90.314,
|
||||||
|
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'note': 'video has subtitles',
|
|
||||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'BV12N4y1M7rh',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
|
||||||
'tags': list,
|
|
||||||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
|
||||||
'duration': 313.557,
|
|
||||||
'upload_date': '20220709',
|
|
||||||
'uploader': '小夫太渴',
|
|
||||||
'timestamp': 1657347907,
|
|
||||||
'uploader_id': '1326814124',
|
|
||||||
'comment_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'like_count': int,
|
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
|
||||||
'subtitles': 'count:2',
|
|
||||||
},
|
|
||||||
'params': {'listsubtitles': True},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bilibili.com/video/av8903802/',
|
'url': 'https://www.bilibili.com/video/av8903802/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'_old_archive_ids': ['bilibili 8903802_part1'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 463665680_part1'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
}, {
|
}, {
|
||||||
|
@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 893839363_part1'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'newer festival video',
|
'note': 'newer festival video',
|
||||||
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||||
|
@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 778246196_part1'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'legacy flv/mp4 video',
|
||||||
|
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1ms411Q7vw_p4',
|
||||||
|
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||||
|
'timestamp': 1458222815,
|
||||||
|
'upload_date': '20160317',
|
||||||
|
'description': '云南方言快乐生产线出品',
|
||||||
|
'duration': float,
|
||||||
|
'uploader': '一笑颠天',
|
||||||
|
'uploader_id': '3916081',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'tags': list,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 4120229_part4'],
|
||||||
|
},
|
||||||
|
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
|
||||||
|
'playlist_count': 19,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1ms411Q7vw_p4_0',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
|
||||||
|
'duration': 399.102,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'note': 'legacy mp4-only video',
|
||||||
|
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1nx411u79K',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
|
||||||
|
'timestamp': 1508893551,
|
||||||
|
'upload_date': '20171025',
|
||||||
|
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
|
||||||
|
'duration': 80.384,
|
||||||
|
'uploader': '伯远',
|
||||||
|
'uploader_id': '10584494',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'tags': list,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 15700301_part1'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'interactive/split-path video',
|
'note': 'interactive/split-path video',
|
||||||
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
|
||||||
|
@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||||
},
|
},
|
||||||
'playlist_count': 33,
|
'playlist_count': 33,
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
|
@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'_old_archive_ids': ['bilibili 292734508_part1'],
|
||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
}, {
|
}, {
|
||||||
|
@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'upload_date': '20191021',
|
'upload_date': '20191021',
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'note': 'video has subtitles, which requires login',
|
||||||
|
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV12N4y1M7rh',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
|
||||||
|
'tags': list,
|
||||||
|
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||||
|
'duration': 313.557,
|
||||||
|
'upload_date': '20220709',
|
||||||
|
'uploader': '小夫太渴',
|
||||||
|
'timestamp': 1657347907,
|
||||||
|
'uploader_id': '1326814124',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
'subtitles': 'count:2', # login required for CC subtitle
|
||||||
|
'_old_archive_ids': ['bilibili 898179753_part1'],
|
||||||
|
},
|
||||||
|
'params': {'listsubtitles': True},
|
||||||
|
'skip': 'login required for subtitle',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -498,8 +631,9 @@ def _real_extract(self, url):
|
||||||
if not self._match_valid_url(urlh.url):
|
if not self._match_valid_url(urlh.url):
|
||||||
return self.url_result(urlh.url)
|
return self.url_result(urlh.url)
|
||||||
|
|
||||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
headers['Referer'] = url
|
||||||
|
|
||||||
|
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||||
is_festival = 'videoData' not in initial_state
|
is_festival = 'videoData' not in initial_state
|
||||||
if is_festival:
|
if is_festival:
|
||||||
video_data = initial_state['videoInfo']
|
video_data = initial_state['videoInfo']
|
||||||
|
@ -548,7 +682,6 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
aid = video_data.get('aid')
|
aid = video_data.get('aid')
|
||||||
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
|
||||||
|
|
||||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||||
|
|
||||||
festival_info = {}
|
festival_info = {}
|
||||||
|
@ -586,16 +719,63 @@ def _real_extract(self, url):
|
||||||
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
||||||
if is_interactive:
|
if is_interactive:
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
|
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||||
__post_extractor=self.extract_comments(aid))
|
__post_extractor=self.extract_comments(aid))
|
||||||
else:
|
else:
|
||||||
|
formats = self.extract_formats(play_info)
|
||||||
|
|
||||||
|
if not traverse_obj(play_info, ('dash')):
|
||||||
|
# we only have legacy formats and need additional work
|
||||||
|
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||||
|
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||||
|
formats.extend(traverse_obj(
|
||||||
|
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||||
|
lambda _, v: not has_qn(v['quality'])))
|
||||||
|
self._check_missing_formats(play_info, formats)
|
||||||
|
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||||
|
if flv_formats and len(flv_formats) < len(formats):
|
||||||
|
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||||
|
if not self._configuration_arg('prefer_multi_flv'):
|
||||||
|
dropped_fmts = ', '.join(
|
||||||
|
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||||
|
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||||
|
if dropped_fmts:
|
||||||
|
self.to_screen(
|
||||||
|
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||||
|
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||||
|
else:
|
||||||
|
formats = traverse_obj(
|
||||||
|
# XXX: Filtering by extractor-arg is for testing purposes
|
||||||
|
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||||
|
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||||
|
|
||||||
|
if traverse_obj(formats, (0, 'fragments')):
|
||||||
|
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||||
|
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||||
return {
|
return {
|
||||||
**metainfo,
|
**metainfo,
|
||||||
|
'_type': 'multi_video',
|
||||||
|
'entries': [{
|
||||||
|
'id': f'{metainfo["id"]}_{idx}',
|
||||||
|
'title': metainfo['title'],
|
||||||
|
'http_headers': metainfo['http_headers'],
|
||||||
|
'formats': [{
|
||||||
|
**fragment,
|
||||||
|
'format_id': formats[0].get('format_id'),
|
||||||
|
}],
|
||||||
|
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||||
|
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||||
|
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||||
|
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
**metainfo,
|
||||||
|
'formats': formats,
|
||||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||||
'chapters': self._get_chapters(aid, cid),
|
'chapters': self._get_chapters(aid, cid),
|
||||||
'subtitles': self.extract_subtitles(video_id, cid),
|
'subtitles': self.extract_subtitles(video_id, cid),
|
||||||
'formats': self.extract_formats(play_info),
|
|
||||||
'__post_extractor': self.extract_comments(aid),
|
'__post_extractor': self.extract_comments(aid),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -968,7 +1148,7 @@ def _real_extract(self, url):
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
|
||||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
class BilibiliSpaceBaseIE(BilibiliBaseIE):
|
||||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||||
first_page = fetch_page(0)
|
first_page = fetch_page(0)
|
||||||
metadata = get_metadata(first_page)
|
metadata = get_metadata(first_page)
|
||||||
|
@ -988,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||||
'id': '3985676',
|
'id': '3985676',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 178,
|
'playlist_mincount': 178,
|
||||||
|
'skip': 'login required',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://space.bilibili.com/313580179/video',
|
'url': 'https://space.bilibili.com/313580179/video',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '313580179',
|
'id': '313580179',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 92,
|
'playlist_mincount': 92,
|
||||||
|
'skip': 'login required',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_signature(self, playlist_id):
|
|
||||||
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
|
|
||||||
|
|
||||||
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
|
|
||||||
img_key = traverse_obj(
|
|
||||||
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
|
|
||||||
sub_key = traverse_obj(
|
|
||||||
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
|
|
||||||
|
|
||||||
session_key = img_key + sub_key
|
|
||||||
|
|
||||||
signature_values = []
|
|
||||||
for position in (
|
|
||||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
|
||||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
|
||||||
57, 62, 11, 36, 20, 34, 44, 52,
|
|
||||||
):
|
|
||||||
char_at_position = try_call(lambda: session_key[position])
|
|
||||||
if char_at_position:
|
|
||||||
signature_values.append(char_at_position)
|
|
||||||
|
|
||||||
return ''.join(signature_values)[:32]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
|
||||||
if not is_video_url:
|
if not is_video_url:
|
||||||
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
|
||||||
'To download audios, add a "/audio" to the URL')
|
'To download audios, add a "/audio" to the URL')
|
||||||
|
|
||||||
signature = self._extract_signature(playlist_id)
|
|
||||||
|
|
||||||
def fetch_page(page_idx):
|
def fetch_page(page_idx):
|
||||||
query = {
|
query = {
|
||||||
'keyword': '',
|
'keyword': '',
|
||||||
'mid': playlist_id,
|
'mid': playlist_id,
|
||||||
'order': 'pubdate',
|
'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
|
||||||
'order_avoided': 'true',
|
'order_avoided': 'true',
|
||||||
'platform': 'web',
|
'platform': 'web',
|
||||||
'pn': page_idx + 1,
|
'pn': page_idx + 1,
|
||||||
'ps': 30,
|
'ps': 30,
|
||||||
'tid': 0,
|
'tid': 0,
|
||||||
'web_location': 1550101,
|
'web_location': 1550101,
|
||||||
'wts': int(time.time()),
|
|
||||||
}
|
}
|
||||||
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
response = self._download_json(
|
||||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
|
||||||
headers={'referer': url})
|
query=self._sign_wbi(query, playlist_id),
|
||||||
|
note=f'Downloading space page {page_idx}', headers={'Referer': url})
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||||
raise
|
raise
|
||||||
if response['code'] in (-352, -401):
|
status_code = response['code']
|
||||||
|
if status_code == -401:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
f'Request is blocked by server ({-response["code"]}), '
|
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||||
'please add cookies, wait and try later.', expected=True)
|
elif status_code == -352 and not self.is_logged_in:
|
||||||
|
self.raise_login_required('Request is rejected, you need to login to access playlist')
|
||||||
|
elif status_code != 0:
|
||||||
|
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
|
||||||
return response['data']
|
return response['data']
|
||||||
|
|
||||||
def get_metadata(page_data):
|
def get_metadata(page_data):
|
||||||
|
@ -1280,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||||
'info_dict': {'id': 'watchlater'},
|
'info_dict': {
|
||||||
|
'id': r're:\d+',
|
||||||
|
'title': '稍后再看',
|
||||||
|
},
|
||||||
'playlist_mincount': 0,
|
'playlist_mincount': 0,
|
||||||
'skip': 'login required',
|
'skip': 'login required',
|
||||||
}]
|
}]
|
||||||
|
@ -1356,14 +1519,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||||
'skip': 'redirect url',
|
'skip': 'redirect url',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bilibili.com/list/watchlater',
|
'url': 'https://www.bilibili.com/list/watchlater',
|
||||||
'info_dict': {'id': 'watchlater'},
|
'info_dict': {
|
||||||
|
'id': r're:2_\d+',
|
||||||
|
'title': '稍后再看',
|
||||||
|
'uploader': str,
|
||||||
|
'uploader_id': str,
|
||||||
|
},
|
||||||
'playlist_mincount': 0,
|
'playlist_mincount': 0,
|
||||||
'skip': 'login required',
|
'skip': 'login required',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||||
'info_dict': {'id': 'watchlater'},
|
'info_dict': {'id': 'watchlater'},
|
||||||
'playlist_mincount': 0,
|
'playlist_mincount': 0,
|
||||||
'skip': 'login required',
|
'skip': 'redirect url & login required',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_medialist(self, query, list_id):
|
def _extract_medialist(self, query, list_id):
|
||||||
|
@ -1414,7 +1582,7 @@ def _real_extract(self, url):
|
||||||
'title': ('title', {str}),
|
'title': ('title', {str}),
|
||||||
'uploader': ('upper', 'name', {str}),
|
'uploader': ('upper', 'name', {str}),
|
||||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||||
'timestamp': ('ctime', {int_or_none}),
|
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
|
||||||
'thumbnail': ('cover', {url_or_none}),
|
'thumbnail': ('cover', {url_or_none}),
|
||||||
})),
|
})),
|
||||||
}
|
}
|
||||||
|
@ -1808,7 +1976,8 @@ def _perform_login(self, username, password):
|
||||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
||||||
login_post = self._download_json(
|
login_post = self._download_json(
|
||||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
|
||||||
|
data=urlencode_postdata({
|
||||||
'username': username,
|
'username': username,
|
||||||
'password': base64.b64encode(password_hash).decode('ascii'),
|
'password': base64.b64encode(password_hash).decode('ascii'),
|
||||||
'keep_me': 'true',
|
'keep_me': 'true',
|
||||||
|
@ -2140,7 +2309,8 @@ def _entries(self, series_id):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
series_id = self._match_id(url)
|
series_id = self._match_id(url)
|
||||||
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
series_info = self._call_api(
|
||||||
|
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
|
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
|
||||||
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
|
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
|
|
||||||
|
|
||||||
class BitChuteIE(InfoExtractor):
|
class BitChuteIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||||
|
@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
|
@ -132,7 +135,7 @@ def _make_url(html):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||||
|
|
||||||
self._raise_if_restricted(webpage)
|
self._raise_if_restricted(webpage)
|
||||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||||
|
@ -171,13 +174,13 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class BitChuteChannelIE(InfoExtractor):
|
class BitChuteChannelIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bitchute.com/channel/bitchute/',
|
'url': 'https://www.bitchute.com/channel/bitchute/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bitchute',
|
'id': 'bitchute',
|
||||||
'title': 'BitChute',
|
'title': 'BitChute',
|
||||||
'description': 'md5:5329fb3866125afa9446835594a9b138',
|
'description': 'md5:2134c37d64fc3a4846787c402956adac',
|
||||||
},
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
|
@ -210,6 +213,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||||
|
@ -230,7 +236,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _make_url(playlist_id, playlist_type):
|
def _make_url(playlist_id, playlist_type):
|
||||||
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
|
return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
|
||||||
|
|
||||||
def _fetch_page(self, playlist_id, playlist_type, page_num):
|
def _fetch_page(self, playlist_id, playlist_type, page_num):
|
||||||
playlist_url = self._make_url(playlist_id, playlist_type)
|
playlist_url = self._make_url(playlist_id, playlist_type)
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
|
|
||||||
|
|
||||||
class BoxIE(InfoExtractor):
|
class BoxIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<service>app|ent)\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||||
|
@ -38,10 +38,22 @@ class BoxIE(InfoExtractor):
|
||||||
'uploader_id': '239068974',
|
'uploader_id': '239068974',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'dash fragment too small'},
|
'params': {'skip_download': 'dash fragment too small'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1536173056065',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader_id': '18523128264',
|
||||||
|
'uploader': 'Lexi Hennigan',
|
||||||
|
'title': 'iPSC Symposium recording part 1.mp4',
|
||||||
|
'timestamp': 1716228343,
|
||||||
|
'upload_date': '20240520',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'dash fragment too small'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
shared_name, file_id = self._match_valid_url(url).groups()
|
shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service')
|
||||||
webpage = self._download_webpage(url, file_id or shared_name)
|
webpage = self._download_webpage(url, file_id or shared_name)
|
||||||
|
|
||||||
if not file_id:
|
if not file_id:
|
||||||
|
@ -57,14 +69,14 @@ def _real_extract(self, url):
|
||||||
request_token = self._search_json(
|
request_token = self._search_json(
|
||||||
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
|
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
|
||||||
access_token = self._download_json(
|
access_token = self._download_json(
|
||||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||||
'Downloading token JSON metadata',
|
'Downloading token JSON metadata',
|
||||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'X-Request-Token': request_token,
|
'X-Request-Token': request_token,
|
||||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||||
})[file_id]['read']
|
})[file_id]['read']
|
||||||
shared_link = 'https://app.box.com/s/' + shared_name
|
shared_link = f'https://{service}.box.com/s/{shared_name}'
|
||||||
f = self._download_json(
|
f = self._download_json(
|
||||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||||
'Downloading file JSON metadata', headers={
|
'Downloading file JSON metadata', headers={
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
fix_xml_ampersands,
|
fix_xml_ampersands,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
@ -386,7 +387,7 @@ def _build_brightcove_url_from_js(cls, object_js):
|
||||||
@classmethod
|
@classmethod
|
||||||
def _make_brightcove_url(cls, params):
|
def _make_brightcove_url(cls, params):
|
||||||
return update_url_query(
|
return update_url_query(
|
||||||
'http://c.brightcove.com/services/viewer/htmlFederated', params)
|
'https://c.brightcove.com/services/viewer/htmlFederated', params)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _extract_brightcove_url(cls, webpage):
|
def _extract_brightcove_url(cls, webpage):
|
||||||
|
@ -470,7 +471,7 @@ def _real_extract(self, url):
|
||||||
if referer:
|
if referer:
|
||||||
headers['Referer'] = referer
|
headers['Referer'] = referer
|
||||||
player_page = self._download_webpage(
|
player_page = self._download_webpage(
|
||||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
'https://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||||
video_id, headers=headers, fatal=False)
|
video_id, headers=headers, fatal=False)
|
||||||
if player_page:
|
if player_page:
|
||||||
player_key = self._search_regex(
|
player_key = self._search_regex(
|
||||||
|
@ -480,7 +481,7 @@ def _real_extract(self, url):
|
||||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||||
if publisher_id:
|
if publisher_id:
|
||||||
brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
|
||||||
if referer:
|
if referer:
|
||||||
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
|
||||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||||
|
@ -538,12 +539,7 @@ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||||
})
|
})
|
||||||
|
|
||||||
def build_format_id(kind):
|
def build_format_id(kind):
|
||||||
format_id = kind
|
return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
|
||||||
if tbr:
|
|
||||||
format_id += f'-{int(tbr)}k'
|
|
||||||
if height:
|
|
||||||
format_id += f'-{height}p'
|
|
||||||
return format_id
|
|
||||||
|
|
||||||
if src or streaming_src:
|
if src or streaming_src:
|
||||||
f.update({
|
f.update({
|
||||||
|
@ -801,7 +797,7 @@ def _extract_brightcove_urls(ie, webpage):
|
||||||
# Look for iframe embeds [1]
|
# Look for iframe embeds [1]
|
||||||
for _, url in re.findall(
|
for _, url in re.findall(
|
||||||
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
|
||||||
entries.append(url if url.startswith('http') else 'http:' + url)
|
entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
|
||||||
|
|
||||||
# Look for <video> tags [2] and embed_in_page embeds [3]
|
# Look for <video> tags [2] and embed_in_page embeds [3]
|
||||||
# [2] looks like:
|
# [2] looks like:
|
||||||
|
@ -830,7 +826,7 @@ def _extract_brightcove_urls(ie, webpage):
|
||||||
player_id = player_id or attrs.get('data-player') or 'default'
|
player_id = player_id or attrs.get('data-player') or 'default'
|
||||||
embed = embed or attrs.get('data-embed') or 'default'
|
embed = embed or attrs.get('data-embed') or 'default'
|
||||||
|
|
||||||
bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
|
||||||
|
|
||||||
# Some brightcove videos may be embedded with video tag only and
|
# Some brightcove videos may be embedded with video tag only and
|
||||||
# without script tag or any mentioning of brightcove at all. Such
|
# without script tag or any mentioning of brightcove at all. Such
|
||||||
|
@ -867,7 +863,7 @@ def _real_extract(self, url):
|
||||||
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
|
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
|
||||||
|
|
||||||
def extract_policy_key():
|
def extract_policy_key():
|
||||||
base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
|
||||||
config = self._download_json(
|
config = self._download_json(
|
||||||
base_url + 'config.json', video_id, fatal=False) or {}
|
base_url + 'config.json', video_id, fatal=False) or {}
|
||||||
policy_key = try_get(
|
policy_key = try_get(
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import base64
|
import base64
|
||||||
|
import functools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
@ -6,17 +7,24 @@
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking import HEADRequest
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
replace_extension,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
|
update_url,
|
||||||
|
url_basename,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -149,6 +157,7 @@ def _real_extract(self, url):
|
||||||
class CBCPlayerIE(InfoExtractor):
|
class CBCPlayerIE(InfoExtractor):
|
||||||
IE_NAME = 'cbc.ca:player'
|
IE_NAME = 'cbc.ca:player'
|
||||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||||
|
_GEO_COUNTRIES = ['CA']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||||
|
@ -172,21 +181,20 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
|
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
|
||||||
'timestamp': 1425704400,
|
'timestamp': 1425704400,
|
||||||
'upload_date': '20150307',
|
'upload_date': '20150307',
|
||||||
'uploader': 'CBCC-NEW',
|
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
|
||||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
|
||||||
'chapters': [],
|
'chapters': [],
|
||||||
'duration': 494.811,
|
'duration': 494.811,
|
||||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
'categories': ['All in a Weekend Montreal'],
|
||||||
'tags': 'count:8',
|
'tags': 'count:11',
|
||||||
'location': 'Quebec',
|
'location': 'Quebec',
|
||||||
'series': 'All in a Weekend Montreal',
|
'series': 'All in a Weekend Montreal',
|
||||||
'season': 'Season 2015',
|
'season': 'Season 2015',
|
||||||
'season_number': 2015,
|
'season_number': 2015,
|
||||||
'media_type': 'Excerpt',
|
'media_type': 'Excerpt',
|
||||||
|
'genres': ['Other'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
|
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
|
||||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2164402062',
|
'id': '2164402062',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -194,107 +202,168 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||||
'timestamp': 1320410746,
|
'timestamp': 1320410746,
|
||||||
'upload_date': '20111104',
|
'upload_date': '20111104',
|
||||||
'uploader': 'CBCC-NEW',
|
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
|
||||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
|
||||||
'chapters': [],
|
'chapters': [],
|
||||||
'duration': 186.867,
|
'duration': 186.867,
|
||||||
'series': 'CBC News: Windsor at 6:00',
|
'series': 'CBC News: Windsor at 6:00',
|
||||||
'categories': ['News/Canada/Windsor'],
|
'categories': ['Windsor'],
|
||||||
'location': 'Windsor',
|
'location': 'Windsor',
|
||||||
'tags': ['cancer'],
|
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
|
||||||
'creators': ['Allison Johnson'],
|
|
||||||
'media_type': 'Excerpt',
|
'media_type': 'Excerpt',
|
||||||
|
'genres': ['News'],
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||||
'url': 'https://www.cbc.ca/player/play/1.2985700',
|
'url': 'https://www.cbc.ca/player/play/1.2985700',
|
||||||
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2657631896',
|
'id': '1.2985700',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
||||||
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
|
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
|
||||||
'timestamp': 1425704400,
|
'timestamp': 1425704400,
|
||||||
'upload_date': '20150307',
|
'upload_date': '20150307',
|
||||||
'uploader': 'CBCC-NEW',
|
'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
|
||||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
|
||||||
'chapters': [],
|
'chapters': [],
|
||||||
'duration': 494.811,
|
'duration': 494.811,
|
||||||
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
'categories': ['All in a Weekend Montreal'],
|
||||||
'tags': 'count:8',
|
'tags': 'count:11',
|
||||||
'location': 'Quebec',
|
'location': 'Quebec',
|
||||||
'series': 'All in a Weekend Montreal',
|
'series': 'All in a Weekend Montreal',
|
||||||
'season': 'Season 2015',
|
'season': 'Season 2015',
|
||||||
'season_number': 2015,
|
'season_number': 2015,
|
||||||
'media_type': 'Excerpt',
|
'media_type': 'Excerpt',
|
||||||
|
'genres': ['Other'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cbc.ca/player/play/1.1711287',
|
'url': 'https://www.cbc.ca/player/play/1.1711287',
|
||||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2164402062',
|
'id': '1.1711287',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Cancer survivor four times over',
|
'title': 'Cancer survivor four times over',
|
||||||
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||||
'timestamp': 1320410746,
|
'timestamp': 1320410746,
|
||||||
'upload_date': '20111104',
|
'upload_date': '20111104',
|
||||||
'uploader': 'CBCC-NEW',
|
'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
|
||||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
|
||||||
'chapters': [],
|
'chapters': [],
|
||||||
'duration': 186.867,
|
'duration': 186.867,
|
||||||
'series': 'CBC News: Windsor at 6:00',
|
'series': 'CBC News: Windsor at 6:00',
|
||||||
'categories': ['News/Canada/Windsor'],
|
'categories': ['Windsor'],
|
||||||
'location': 'Windsor',
|
'location': 'Windsor',
|
||||||
'tags': ['cancer'],
|
'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
|
||||||
'creators': ['Allison Johnson'],
|
|
||||||
'media_type': 'Excerpt',
|
'media_type': 'Excerpt',
|
||||||
|
'genres': ['News'],
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
# Has subtitles
|
# Has subtitles
|
||||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||||
'url': 'https://www.cbc.ca/player/play/1.7159484',
|
'url': 'https://www.cbc.ca/player/play/video/9.6424403',
|
||||||
'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
|
'md5': '8025909eaffcf0adf59922904def9a5e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2324213316001',
|
'id': '9.6424403',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The National | School boards sue social media giants',
|
'title': 'The National | N.W.T. wildfire emergency',
|
||||||
'description': 'md5:4b4db69322fa32186c3ce426da07402c',
|
'description': 'md5:ada33d36d1df69347ed575905bfd496c',
|
||||||
'timestamp': 1711681200,
|
'timestamp': 1718589600,
|
||||||
'duration': 2743.400,
|
'duration': 2692.833,
|
||||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
'subtitles': {
|
||||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
|
'en-US': [{
|
||||||
'uploader': 'CBCC-NEW',
|
'name': 'English Captions',
|
||||||
|
'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg',
|
||||||
'chapters': 'count:5',
|
'chapters': 'count:5',
|
||||||
'upload_date': '20240329',
|
'upload_date': '20240617',
|
||||||
'categories': 'count:4',
|
'categories': ['News', 'The National', 'The National Latest Broadcasts'],
|
||||||
'series': 'The National - Full Show',
|
'series': 'The National - Full Show',
|
||||||
'tags': 'count:1',
|
'tags': ['The National'],
|
||||||
'creators': ['News'],
|
|
||||||
'location': 'Canada',
|
'location': 'Canada',
|
||||||
'media_type': 'Full Program',
|
'media_type': 'Full Program',
|
||||||
|
'genres': ['News'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
|
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
|
||||||
'md5': '188b96cf6bdcb2540e178a6caa957128',
|
'md5': '188b96cf6bdcb2540e178a6caa957128',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2334524995812',
|
'id': '1.7194274',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
|
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
|
||||||
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
|
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
|
||||||
'timestamp': 1714788791,
|
'timestamp': 1714788791,
|
||||||
'duration': 77.678,
|
'duration': 77.678,
|
||||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
|
'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
|
||||||
'uploader': 'CBCC-NEW',
|
'chapters': [],
|
||||||
'chapters': 'count:0',
|
|
||||||
'upload_date': '20240504',
|
|
||||||
'categories': 'count:3',
|
'categories': 'count:3',
|
||||||
'series': 'The National',
|
'series': 'The National',
|
||||||
'tags': 'count:15',
|
'tags': 'count:17',
|
||||||
'creators': ['encoder'],
|
|
||||||
'location': 'Canada',
|
'location': 'Canada',
|
||||||
'media_type': 'Excerpt',
|
'media_type': 'Excerpt',
|
||||||
|
'upload_date': '20240504',
|
||||||
|
'genres': ['News'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cbc.ca/player/play/video/9.6427282',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9.6427282',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Men\'s Soccer - Argentina vs Morocco',
|
||||||
|
'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
|
||||||
|
'series': 'CBC Sports',
|
||||||
|
'media_type': 'Event Coverage',
|
||||||
|
'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
|
||||||
|
'timestamp': 1721825400.0,
|
||||||
|
'upload_date': '20240724',
|
||||||
|
'duration': 10568.0,
|
||||||
|
'chapters': [],
|
||||||
|
'genres': [],
|
||||||
|
'tags': ['2024 Paris Olympic Games'],
|
||||||
|
'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
|
||||||
|
'location': 'Canada',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cbc.ca/player/play/video/9.6459530',
|
||||||
|
'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9.6459530',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Parts of Jasper incinerated as wildfire rages',
|
||||||
|
'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
|
||||||
|
'series': 'The National',
|
||||||
|
'media_type': 'Excerpt',
|
||||||
|
'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
|
||||||
|
'timestamp': 1721964091.012,
|
||||||
|
'upload_date': '20240726',
|
||||||
|
'duration': 952.285,
|
||||||
|
'chapters': [],
|
||||||
|
'genres': [],
|
||||||
|
'tags': 'count:23',
|
||||||
|
'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cbc.ca/player/play/video/9.6420651',
|
||||||
|
'md5': '71a850c2c6ee5e912de169f5311bb533',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9.6420651',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
|
||||||
|
'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
|
||||||
|
'series': 'CBC News Edmonton',
|
||||||
|
'media_type': 'Excerpt',
|
||||||
|
'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
|
||||||
|
'timestamp': 1718220065.768,
|
||||||
|
'upload_date': '20240612',
|
||||||
|
'duration': 286.086,
|
||||||
|
'chapters': [],
|
||||||
|
'genres': ['News'],
|
||||||
|
'categories': ['News', 'Edmonton'],
|
||||||
|
'tags': 'count:7',
|
||||||
|
'location': 'Edmonton',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'cbcplayer:1.7159484',
|
'url': 'cbcplayer:1.7159484',
|
||||||
|
@ -307,25 +376,115 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _parse_param(self, asset_data, name):
|
||||||
|
return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
if '.' in video_id:
|
|
||||||
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
||||||
video_id = self._search_json(
|
data = self._search_json(
|
||||||
r'window\.__INITIAL_STATE__\s*=', webpage,
|
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip']
|
||||||
'initial state', video_id)['video']['currentClip']['mediaId']
|
assets = traverse_obj(
|
||||||
|
data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type']))
|
||||||
|
|
||||||
|
if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))):
|
||||||
|
# XXX: Deprecated; CBC is migrating off of ThePlatform
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'ThePlatform',
|
'ie_key': 'ThePlatform',
|
||||||
'url': smuggle_url(
|
'url': smuggle_url(
|
||||||
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', {
|
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
|
||||||
'force_smil_url': True,
|
'force_smil_url': True,
|
||||||
}),
|
}),
|
||||||
'id': video_id,
|
'id': media_id,
|
||||||
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
|
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
|
||||||
}
|
}
|
||||||
|
|
||||||
|
is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live'
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
|
||||||
|
for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))):
|
||||||
|
subtitles.setdefault(sub.get('language') or 'und', []).append({
|
||||||
|
'url': sub['src'],
|
||||||
|
'name': sub.get('label'),
|
||||||
|
})
|
||||||
|
|
||||||
|
for asset in assets:
|
||||||
|
asset_key = asset['key']
|
||||||
|
asset_type = asset['type']
|
||||||
|
if asset_type != 'medianet':
|
||||||
|
self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}')
|
||||||
|
continue
|
||||||
|
asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON')
|
||||||
|
ext = mimetype2ext(self._parse_param(asset_data, 'contentType'))
|
||||||
|
if ext == 'm3u8':
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live)
|
||||||
|
formats.extend(fmts)
|
||||||
|
# Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
|
||||||
|
if not subtitles:
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
if is_live or not fmts:
|
||||||
|
continue
|
||||||
|
# Check for direct https mp4 format
|
||||||
|
best_video_fmt = traverse_obj(fmts, (
|
||||||
|
lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all,
|
||||||
|
{functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {}
|
||||||
|
base_url = self._search_regex(
|
||||||
|
r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None)
|
||||||
|
if not base_url or '/live/' in base_url:
|
||||||
|
continue
|
||||||
|
mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4')
|
||||||
|
if self._request_webpage(
|
||||||
|
HEADRequest(mp4_url), video_id, 'Checking for https format',
|
||||||
|
errnote=False, fatal=False):
|
||||||
|
formats.append({
|
||||||
|
**best_video_fmt,
|
||||||
|
'url': mp4_url,
|
||||||
|
'format_id': 'https-mp4',
|
||||||
|
'protocol': 'https',
|
||||||
|
'manifest_url': None,
|
||||||
|
'acodec': None,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': asset_data['url'],
|
||||||
|
'ext': ext,
|
||||||
|
'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
chapters = traverse_obj(data, (
|
||||||
|
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
|
||||||
|
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
'title': ('name', {str}),
|
||||||
|
}))
|
||||||
|
# Filter out pointless single chapters with start_time==0 and no end_time
|
||||||
|
if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')):
|
||||||
|
chapters = []
|
||||||
|
|
||||||
|
return {
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str.strip}),
|
||||||
|
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
|
||||||
|
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
'media_type': ('media', 'clipType', {str}),
|
||||||
|
'series': ('showName', {str}),
|
||||||
|
'season_number': ('media', 'season', {int_or_none}),
|
||||||
|
'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}),
|
||||||
|
'location': ('media', 'region', {str}),
|
||||||
|
'tags': ('tags', ..., 'name', {str}),
|
||||||
|
'genres': ('media', 'genre', all),
|
||||||
|
'categories': ('categories', ..., 'name', {str}),
|
||||||
|
}),
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'chapters': chapters,
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class CBCPlayerPlaylistIE(InfoExtractor):
|
class CBCPlayerPlaylistIE(InfoExtractor):
|
||||||
IE_NAME = 'cbc.ca:player:playlist'
|
IE_NAME = 'cbc.ca:player:playlist'
|
||||||
|
@ -455,10 +614,8 @@ def _get_claims_token_expiry(self):
|
||||||
|
|
||||||
def claims_token_expired(self):
|
def claims_token_expired(self):
|
||||||
exp = self._get_claims_token_expiry()
|
exp = self._get_claims_token_expiry()
|
||||||
if exp - time.time() < 10:
|
|
||||||
# It will expire in less than 10 seconds, or has already expired
|
# It will expire in less than 10 seconds, or has already expired
|
||||||
return True
|
return exp - time.time() < 10
|
||||||
return False
|
|
||||||
|
|
||||||
def claims_token_valid(self):
|
def claims_token_valid(self):
|
||||||
return self._claims_token is not None and not self.claims_token_expired()
|
return self._claims_token is not None and not self.claims_token_expired()
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import base64
|
import base64
|
||||||
import re
|
import re
|
||||||
import urllib.error
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
|
|
|
@ -1,63 +1,50 @@
|
||||||
from .common import InfoExtractor
|
from .vidyard import VidyardBaseIE, VidyardIE
|
||||||
from ..utils import traverse_obj
|
from ..utils import ExtractorError, make_archive_id, url_basename
|
||||||
|
|
||||||
|
|
||||||
class CellebriteIE(InfoExtractor):
|
class CellebriteIE(VidyardBaseIE):
|
||||||
_VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
|
_VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
|
'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '16025876',
|
'id': 'ZqmUss3dQfEMGpauambPuH',
|
||||||
|
'display_id': '16025876',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
|
|
||||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
|
||||||
'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
|
'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
|
||||||
'duration': 455,
|
'description': 'md5:dee48fe12bbae5c01fe6a053f7676da4',
|
||||||
'tags': [],
|
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||||
|
'duration': 455.979,
|
||||||
|
'_old_archive_ids': ['cellebrite 16025876'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
|
'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '29018255',
|
'id': 'QV1U8a2yzcxigw7VFnqKyg',
|
||||||
|
'display_id': '29018255',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 134,
|
'title': 'How to Lawfully Collect the Maximum Amount of Data From Android Devices',
|
||||||
'tags': [],
|
'description': 'md5:0e943a9ac14c374d5d74faed634d773c',
|
||||||
'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
|
|
||||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
|
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
|
||||||
'title': 'Android Extractions Explained',
|
'duration': 134.315,
|
||||||
|
'_old_archive_ids': ['cellebrite 29018255'],
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_formats_and_subtitles(self, json_data, display_id):
|
|
||||||
formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
|
|
||||||
subtitles = {}
|
|
||||||
|
|
||||||
for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
|
|
||||||
fmt, sub = self._extract_m3u8_formats_and_subtitles(
|
|
||||||
url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
|
|
||||||
formats.extend(fmt)
|
|
||||||
self._merge_subtitles(sub, target=subtitles)
|
|
||||||
|
|
||||||
return formats, subtitles
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
slug = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, slug)
|
||||||
|
vidyard_url = next(VidyardIE._extract_embed_urls(url, webpage), None)
|
||||||
|
if not vidyard_url:
|
||||||
|
raise ExtractorError('No Vidyard video embeds found on page')
|
||||||
|
|
||||||
player_uuid = self._search_regex(
|
video_id = url_basename(vidyard_url)
|
||||||
r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
|
info = self._process_video_json(self._fetch_video_json(video_id)['chapters'][0], video_id)
|
||||||
json_data = self._download_json(
|
if info.get('display_id'):
|
||||||
f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
|
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||||
|
if thumbnail := self._og_search_thumbnail(webpage, default=None):
|
||||||
|
info.setdefault('thumbnails', []).append({'url': thumbnail})
|
||||||
|
|
||||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
|
|
||||||
return {
|
return {
|
||||||
'id': str(json_data['videoId']),
|
'description': self._og_search_description(webpage, default=None),
|
||||||
'title': json_data.get('name') or self._og_search_title(webpage),
|
**info,
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'description': json_data.get('description') or self._og_search_description(webpage),
|
|
||||||
'duration': json_data.get('seconds'),
|
|
||||||
'tags': json_data.get('tags'),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'http_headers': {'Referer': 'https://play.vidyard.com/'},
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ class CHZZKLiveIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
live_detail = self._download_json(
|
live_detail = self._download_json(
|
||||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
f'https://api.chzzk.naver.com/service/v3/channels/{channel_id}/live-detail', channel_id,
|
||||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||||
|
|
||||||
if live_detail.get('status') == 'CLOSE':
|
if live_detail.get('status') == 'CLOSE':
|
||||||
|
@ -106,12 +106,45 @@ class CHZZKVideoIE(InfoExtractor):
|
||||||
'upload_date': '20231219',
|
'upload_date': '20231219',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
|
'skip': 'Replay video is expired',
|
||||||
|
}, {
|
||||||
|
# Manually uploaded video
|
||||||
|
'url': 'https://chzzk.naver.com/video/1980',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1980',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '※시청주의※한번보면 잊기 힘든 영상',
|
||||||
|
'channel': '라디유radiyu',
|
||||||
|
'channel_id': '68f895c59a1043bc5019b5e08c83a5c5',
|
||||||
|
'channel_is_verified': False,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 95,
|
||||||
|
'timestamp': 1703102631.722,
|
||||||
|
'upload_date': '20231220',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Partner channel replay video
|
||||||
|
'url': 'https://chzzk.naver.com/video/2458',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2458',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '첫 방송',
|
||||||
|
'channel': '강지',
|
||||||
|
'channel_id': 'b5ed5db484d04faf4d150aedd362f34b',
|
||||||
|
'channel_is_verified': True,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 4433,
|
||||||
|
'timestamp': 1703307460.214,
|
||||||
|
'upload_date': '20231223',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_meta = self._download_json(
|
video_meta = self._download_json(
|
||||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
|
||||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||||
|
|
|
@ -6,11 +6,11 @@
|
||||||
class CloudflareStreamIE(InfoExtractor):
|
class CloudflareStreamIE(InfoExtractor):
|
||||||
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
|
||||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||||
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
|
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
|
||||||
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
|
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
|
||||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||||
_EMBED_REGEX = [
|
_EMBED_REGEX = [
|
||||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
|
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
|
||||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||||
]
|
]
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -24,6 +24,14 @@ class CloudflareStreamIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0e8e040aec776862e1d632a699edf59e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '0e8e040aec776862e1d632a699edf59e',
|
||||||
|
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -36,6 +44,9 @@ class CloudflareStreamIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_WEBPAGE_TESTS = [{
|
_WEBPAGE_TESTS = [{
|
||||||
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor):
|
||||||
'duration': 1205,
|
'duration': 1205,
|
||||||
'upload_date': '20221130',
|
'upload_date': '20221130',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Video-only m3u8 formats need manual fixup
|
||||||
|
'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
|
||||||
|
'md5': 'fc472e40f6e6238446509be411c920e2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '08j_d24-6000-074',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20240620',
|
||||||
|
'duration': 1673,
|
||||||
|
'title': 'D24-6000-074-cetstud',
|
||||||
|
'timestamp': 1718902233,
|
||||||
|
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
|
||||||
|
},
|
||||||
|
'params': {'format': 'bv'},
|
||||||
}]
|
}]
|
||||||
_WEBPAGE_TESTS = [{
|
_WEBPAGE_TESTS = [{
|
||||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||||
|
@ -63,6 +79,9 @@ def _real_extract(self, url):
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
||||||
|
for fmt in fmts:
|
||||||
|
if re.search(r'chunklist_b\d+_vo_', fmt['url']):
|
||||||
|
fmt['acodec'] = 'none'
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
|
|
@ -2222,6 +2222,11 @@ def build_stream_name():
|
||||||
'quality': quality,
|
'quality': quality,
|
||||||
'has_drm': has_drm,
|
'has_drm': has_drm,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# YouTube-specific
|
||||||
|
if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
|
||||||
|
f['language'] = yt_audio_content_id.split('.')[0]
|
||||||
|
|
||||||
resolution = last_stream_inf.get('RESOLUTION')
|
resolution = last_stream_inf.get('RESOLUTION')
|
||||||
if resolution:
|
if resolution:
|
||||||
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
|
||||||
|
@ -3145,7 +3150,7 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
||||||
})
|
})
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
|
||||||
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
|
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None):
|
||||||
def absolute_url(item_url):
|
def absolute_url(item_url):
|
||||||
return urljoin(base_url, item_url)
|
return urljoin(base_url, item_url)
|
||||||
|
|
||||||
|
@ -3169,11 +3174,11 @@ def _media_formats(src, cur_media_type, type_info=None):
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
full_url, video_id, ext='mp4',
|
full_url, video_id, ext='mp4',
|
||||||
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
|
||||||
preference=preference, quality=quality, fatal=False)
|
preference=preference, quality=quality, fatal=False, headers=_headers)
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
is_plain_url = False
|
is_plain_url = False
|
||||||
formats = self._extract_mpd_formats(
|
formats = self._extract_mpd_formats(
|
||||||
full_url, video_id, mpd_id=mpd_id, fatal=False)
|
full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers)
|
||||||
else:
|
else:
|
||||||
is_plain_url = True
|
is_plain_url = True
|
||||||
formats = [{
|
formats = [{
|
||||||
|
@ -3267,6 +3272,8 @@ def _media_formats(src, cur_media_type, type_info=None):
|
||||||
})
|
})
|
||||||
for f in media_info['formats']:
|
for f in media_info['formats']:
|
||||||
f.setdefault('http_headers', {})['Referer'] = base_url
|
f.setdefault('http_headers', {})['Referer'] = base_url
|
||||||
|
if _headers:
|
||||||
|
f['http_headers'].update(_headers)
|
||||||
if media_info['formats'] or media_info['subtitles']:
|
if media_info['formats'] or media_info['subtitles']:
|
||||||
entries.append(media_info)
|
entries.append(media_info)
|
||||||
return entries
|
return entries
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_protocol,
|
determine_protocol,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
@ -52,7 +53,7 @@ def _real_extract(self, url):
|
||||||
is_hls = container == 'M2TS'
|
is_hls = container == 'M2TS'
|
||||||
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
|
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': ('hls' if is_hls else protocol) + (f'-{tbr}' if tbr else ''),
|
'format_id': join_nonempty('hls' if is_hls else protocol, tbr),
|
||||||
'url': rendition_url,
|
'url': rendition_url,
|
||||||
'width': int_or_none(rendition.get('frameWidth')),
|
'width': int_or_none(rendition.get('frameWidth')),
|
||||||
'height': int_or_none(rendition.get('frameHeight')),
|
'height': int_or_none(rendition.get('frameHeight')),
|
||||||
|
|
|
@ -1,17 +1,20 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
parse_resolution,
|
parse_codecs,
|
||||||
traverse_obj,
|
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class DigitalConcertHallIE(InfoExtractor):
|
class DigitalConcertHallIE(InfoExtractor):
|
||||||
IE_DESC = 'DigitalConcertHall extractor'
|
IE_DESC = 'DigitalConcertHall extractor'
|
||||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
|
||||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||||
|
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||||
_ACCESS_TOKEN = None
|
_ACCESS_TOKEN = None
|
||||||
_NETRC_MACHINE = 'digitalconcerthall'
|
_NETRC_MACHINE = 'digitalconcerthall'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -26,7 +29,8 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||||
'upload_date': '20210624',
|
'upload_date': '20210624',
|
||||||
'timestamp': 1624548600,
|
'timestamp': 1624548600,
|
||||||
'duration': 2798,
|
'duration': 2798,
|
||||||
'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
|
'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
|
||||||
|
'composers': ['Kurt Weill'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
|
@ -34,8 +38,9 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||||
'url': 'https://www.digitalconcerthall.com/en/concert/53785',
|
'url': 'https://www.digitalconcerthall.com/en/concert/53785',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '53785',
|
'id': '53785',
|
||||||
'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
|
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
|
||||||
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||||
|
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
|
@ -49,39 +54,59 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||||
'upload_date': '20220714',
|
'upload_date': '20220714',
|
||||||
'timestamp': 1657785600,
|
'timestamp': 1657785600,
|
||||||
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff',
|
'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'Concert with several works and an interview',
|
||||||
|
'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '53785',
|
||||||
|
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
|
||||||
|
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
|
||||||
|
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'playlist_count': 1,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
token_response = self._download_json(
|
login_token = self._download_json(
|
||||||
self._OAUTH_URL,
|
self._OAUTH_URL,
|
||||||
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
||||||
'affiliate': 'none',
|
'affiliate': 'none',
|
||||||
'grant_type': 'device',
|
'grant_type': 'device',
|
||||||
'device_vendor': 'unknown',
|
'device_vendor': 'unknown',
|
||||||
|
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio
|
||||||
|
'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari',
|
||||||
'app_id': 'dch.webapp',
|
'app_id': 'dch.webapp',
|
||||||
'app_version': '1.0.0',
|
'app_distributor': 'berlinphil',
|
||||||
|
'app_version': '1.84.0',
|
||||||
'client_secret': '2ySLN+2Fwb',
|
'client_secret': '2ySLN+2Fwb',
|
||||||
}), headers={
|
}), headers={
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'Accept': 'application/json',
|
||||||
})
|
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||||
self._ACCESS_TOKEN = token_response['access_token']
|
'User-Agent': self._USER_AGENT,
|
||||||
|
})['access_token']
|
||||||
try:
|
try:
|
||||||
self._download_json(
|
login_response = self._download_json(
|
||||||
self._OAUTH_URL,
|
self._OAUTH_URL,
|
||||||
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
|
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
|
||||||
'grant_type': 'password',
|
'grant_type': 'password',
|
||||||
'username': username,
|
'username': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
}), headers={
|
}), headers={
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'Accept': 'application/json',
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||||
'Referer': 'https://www.digitalconcerthall.com',
|
'Referer': 'https://www.digitalconcerthall.com',
|
||||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
'Authorization': f'Bearer {login_token}',
|
||||||
|
'User-Agent': self._USER_AGENT,
|
||||||
})
|
})
|
||||||
except ExtractorError:
|
except ExtractorError as error:
|
||||||
self.raise_login_required(msg='Login info incorrect')
|
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||||
|
raise ExtractorError('Invalid username or password', expected=True)
|
||||||
|
raise
|
||||||
|
self._ACCESS_TOKEN = login_response['access_token']
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if not self._ACCESS_TOKEN:
|
if not self._ACCESS_TOKEN:
|
||||||
|
@ -95,17 +120,20 @@ def _entries(self, items, language, type_, **kwargs):
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||||
'Accept-Language': language,
|
'Accept-Language': language,
|
||||||
|
'User-Agent': self._USER_AGENT,
|
||||||
})
|
})
|
||||||
|
|
||||||
m3u8_url = traverse_obj(
|
formats = []
|
||||||
stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
|
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
|
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
for fmt in formats:
|
||||||
|
if fmt.get('format_note') and fmt.get('vcodec') == 'none':
|
||||||
|
fmt.update(parse_codecs(fmt['format_note']))
|
||||||
|
|
||||||
yield {
|
yield {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': item.get('title'),
|
'title': item.get('title'),
|
||||||
'composer': item.get('name_composer'),
|
'composer': item.get('name_composer'),
|
||||||
'url': m3u8_url,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': item.get('duration_total'),
|
'duration': item.get('duration_total'),
|
||||||
'timestamp': traverse_obj(item, ('date', 'published')),
|
'timestamp': traverse_obj(item, ('date', 'published')),
|
||||||
|
@ -119,31 +147,34 @@ def _entries(self, items, language, type_, **kwargs):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id')
|
language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part')
|
||||||
if not language:
|
if not language:
|
||||||
language = 'en'
|
language = 'en'
|
||||||
|
|
||||||
thumbnail_url = self._html_search_regex(
|
api_type = 'concert' if type_ == 'work' else type_
|
||||||
r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
|
|
||||||
self._download_webpage(url, video_id), 'thumbnail')
|
|
||||||
thumbnails = [{
|
|
||||||
'url': thumbnail_url,
|
|
||||||
**parse_resolution(thumbnail_url),
|
|
||||||
}]
|
|
||||||
|
|
||||||
vid_info = self._download_json(
|
vid_info = self._download_json(
|
||||||
f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={
|
f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
'Accept-Language': language,
|
'Accept-Language': language,
|
||||||
|
'User-Agent': self._USER_AGENT,
|
||||||
|
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||||
})
|
})
|
||||||
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
|
|
||||||
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||||
|
|
||||||
|
if type_ == 'work':
|
||||||
|
videos = [videos[int(part) - 1]]
|
||||||
|
|
||||||
|
album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name', {str}))
|
||||||
|
thumbnail = traverse_obj(vid_info, (
|
||||||
|
'image', ..., {self._proto_relative_url}, {url_or_none},
|
||||||
|
{lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': vid_info.get('title'),
|
'title': vid_info.get('title'),
|
||||||
'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_),
|
'entries': self._entries(
|
||||||
'thumbnails': thumbnails,
|
videos, language, type_, thumbnail=thumbnail, album_artists=album_artists),
|
||||||
'album_artist': album_artist,
|
'thumbnail': thumbnail,
|
||||||
|
'album_artists': album_artists,
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,115 +0,0 @@
|
||||||
import random
|
|
||||||
import string
|
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
from .discoverygo import DiscoveryGoBaseIE
|
|
||||||
from ..networking.exceptions import HTTPError
|
|
||||||
from ..utils import ExtractorError
|
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
|
||||||
_VALID_URL = r'''(?x)https?://
|
|
||||||
(?P<site>
|
|
||||||
go\.discovery|
|
|
||||||
www\.
|
|
||||||
(?:
|
|
||||||
investigationdiscovery|
|
|
||||||
discoverylife|
|
|
||||||
animalplanet|
|
|
||||||
ahctv|
|
|
||||||
destinationamerica|
|
|
||||||
sciencechannel|
|
|
||||||
tlc
|
|
||||||
)|
|
|
||||||
watch\.
|
|
||||||
(?:
|
|
||||||
hgtv|
|
|
||||||
foodnetwork|
|
|
||||||
travelchannel|
|
|
||||||
diynetwork|
|
|
||||||
cookingchanneltv|
|
|
||||||
motortrend
|
|
||||||
)
|
|
||||||
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5a2f35ce6b66d17a5026e29e',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Riding with Matthew Perry',
|
|
||||||
'description': 'md5:a34333153e79bc4526019a5129e7f878',
|
|
||||||
'duration': 84,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # requires ffmpeg
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
# using `show_slug` is important to get the correct video data
|
|
||||||
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
_GEO_COUNTRIES = ['US']
|
|
||||||
_GEO_BYPASS = False
|
|
||||||
_API_BASE_URL = 'https://api.discovery.com/v1/'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
site, show_slug, display_id = self._match_valid_url(url).groups()
|
|
||||||
|
|
||||||
access_token = None
|
|
||||||
cookies = self._get_cookies(url)
|
|
||||||
|
|
||||||
# prefer Affiliate Auth Token over Anonymous Auth Token
|
|
||||||
auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
|
|
||||||
if auth_storage_cookie and auth_storage_cookie.value:
|
|
||||||
auth_storage = self._parse_json(urllib.parse.unquote(
|
|
||||||
urllib.parse.unquote(auth_storage_cookie.value)),
|
|
||||||
display_id, fatal=False) or {}
|
|
||||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
|
||||||
|
|
||||||
if not access_token:
|
|
||||||
access_token = self._download_json(
|
|
||||||
f'https://{site}.com/anonymous', display_id,
|
|
||||||
'Downloading token JSON metadata', query={
|
|
||||||
'authRel': 'authorization',
|
|
||||||
'client_id': '3020a40c2356a645b4b4',
|
|
||||||
'nonce': ''.join(random.choices(string.ascii_letters, k=32)),
|
|
||||||
'redirectUri': 'https://www.discovery.com/',
|
|
||||||
})['access_token']
|
|
||||||
|
|
||||||
headers = self.geo_verification_headers()
|
|
||||||
headers['Authorization'] = 'Bearer ' + access_token
|
|
||||||
|
|
||||||
try:
|
|
||||||
video = self._download_json(
|
|
||||||
self._API_BASE_URL + 'content/videos',
|
|
||||||
display_id, 'Downloading content JSON metadata',
|
|
||||||
headers=headers, query={
|
|
||||||
'embed': 'show.name',
|
|
||||||
'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
|
|
||||||
'slug': display_id,
|
|
||||||
'show_slug': show_slug,
|
|
||||||
})[0]
|
|
||||||
video_id = video['id']
|
|
||||||
stream = self._download_json(
|
|
||||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
|
||||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
|
||||||
e_description = self._parse_json(
|
|
||||||
e.cause.response.read().decode(), display_id)['description']
|
|
||||||
if 'resource not available for country' in e_description:
|
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
|
||||||
if 'Authorized Networks' in e_description:
|
|
||||||
raise ExtractorError(
|
|
||||||
'This video is only available via cable service provider subscription that'
|
|
||||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
|
||||||
raise ExtractorError(e_description)
|
|
||||||
raise
|
|
||||||
|
|
||||||
return self._extract_video_info(video, stream, display_id)
|
|
|
@ -1,171 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
determine_ext,
|
|
||||||
extract_attributes,
|
|
||||||
int_or_none,
|
|
||||||
parse_age_limit,
|
|
||||||
remove_end,
|
|
||||||
unescapeHTML,
|
|
||||||
url_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryGoBaseIE(InfoExtractor):
|
|
||||||
_VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
|
|
||||||
discovery|
|
|
||||||
investigationdiscovery|
|
|
||||||
discoverylife|
|
|
||||||
animalplanet|
|
|
||||||
ahctv|
|
|
||||||
destinationamerica|
|
|
||||||
sciencechannel|
|
|
||||||
tlc|
|
|
||||||
velocitychannel
|
|
||||||
)go\.com/%s(?P<id>[^/?#&]+)'''
|
|
||||||
|
|
||||||
def _extract_video_info(self, video, stream, display_id):
|
|
||||||
title = video['name']
|
|
||||||
|
|
||||||
if not stream:
|
|
||||||
if video.get('authenticated') is True:
|
|
||||||
raise ExtractorError(
|
|
||||||
'This video is only available via cable service provider subscription that'
|
|
||||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to find stream')
|
|
||||||
STREAM_URL_SUFFIX = 'streamUrl'
|
|
||||||
formats = []
|
|
||||||
for stream_kind in ('', 'hds'):
|
|
||||||
suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
|
|
||||||
stream_url = stream.get(f'{stream_kind}{suffix}')
|
|
||||||
if not stream_url:
|
|
||||||
continue
|
|
||||||
if stream_kind == '':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
elif stream_kind == 'hds':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
stream_url, display_id, f4m_id=stream_kind, fatal=False))
|
|
||||||
|
|
||||||
video_id = video.get('id') or display_id
|
|
||||||
description = video.get('description', {}).get('detailed')
|
|
||||||
duration = int_or_none(video.get('duration'))
|
|
||||||
|
|
||||||
series = video.get('show', {}).get('name')
|
|
||||||
season_number = int_or_none(video.get('season', {}).get('number'))
|
|
||||||
episode_number = int_or_none(video.get('episodeNumber'))
|
|
||||||
|
|
||||||
tags = video.get('tags')
|
|
||||||
age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
captions = stream.get('captions')
|
|
||||||
if isinstance(captions, list):
|
|
||||||
for caption in captions:
|
|
||||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
|
||||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
|
||||||
continue
|
|
||||||
lang = caption.get('fileLang', 'en')
|
|
||||||
ext = determine_ext(subtitle_url)
|
|
||||||
subtitles.setdefault(lang, []).append({
|
|
||||||
'url': subtitle_url,
|
|
||||||
'ext': 'ttml' if ext == 'xml' else ext,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'series': series,
|
|
||||||
'season_number': season_number,
|
|
||||||
'episode_number': episode_number,
|
|
||||||
'tags': tags,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryGoIE(DiscoveryGoBaseIE):
|
|
||||||
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
|
|
||||||
_GEO_COUNTRIES = ['US']
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '58c167d86b66d12f2addeb01',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Reaper Madness',
|
|
||||||
'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
|
|
||||||
'duration': 2519,
|
|
||||||
'series': 'Bering Sea Gold',
|
|
||||||
'season_number': 8,
|
|
||||||
'episode_number': 6,
|
|
||||||
'age_limit': 14,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
container = extract_attributes(
|
|
||||||
self._search_regex(
|
|
||||||
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
|
|
||||||
webpage, 'video container'))
|
|
||||||
|
|
||||||
video = self._parse_json(
|
|
||||||
container.get('data-video') or container.get('data-json'),
|
|
||||||
display_id)
|
|
||||||
|
|
||||||
stream = video.get('stream')
|
|
||||||
|
|
||||||
return self._extract_video_info(video, stream, display_id)
|
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
|
|
||||||
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://www.discoverygo.com/bering-sea-gold/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'bering-sea-gold',
|
|
||||||
'title': 'Bering Sea Gold',
|
|
||||||
'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 6,
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return False if DiscoveryGoIE.suitable(url) else super().suitable(url)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
|
|
||||||
data = self._parse_json(
|
|
||||||
mobj.group('json'), display_id,
|
|
||||||
transform_source=unescapeHTML, fatal=False)
|
|
||||||
if not isinstance(data, dict) or data.get('type') != 'episode':
|
|
||||||
continue
|
|
||||||
episode_url = data.get('socialUrl')
|
|
||||||
if not episode_url:
|
|
||||||
continue
|
|
||||||
entries.append(self.url_result(
|
|
||||||
episode_url, ie=DiscoveryGoIE.ie_key(),
|
|
||||||
video_id=data.get('id')))
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, display_id,
|
|
||||||
remove_end(self._og_search_title(
|
|
||||||
webpage, fatal=False), ' | Discovery GO'),
|
|
||||||
self._og_search_description(webpage))
|
|
|
@ -24,8 +24,9 @@
|
||||||
class DouyuBaseIE(InfoExtractor):
|
class DouyuBaseIE(InfoExtractor):
|
||||||
def _download_cryptojs_md5(self, video_id):
|
def _download_cryptojs_md5(self, video_id):
|
||||||
for url in [
|
for url in [
|
||||||
|
# XXX: Do NOT use cdn.bootcdn.net; ref: https://sansec.io/research/polyfill-supply-chain-attack
|
||||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
'https://unpkg.com/cryptojslib@3.1.2/rollups/md5.js',
|
||||||
]:
|
]:
|
||||||
js_code = self._download_webpage(
|
js_code = self._download_webpage(
|
||||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||||
|
@ -35,7 +36,8 @@ def _download_cryptojs_md5(self, video_id):
|
||||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||||
|
|
||||||
def _get_cryptojs_md5(self, video_id):
|
def _get_cryptojs_md5(self, video_id):
|
||||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
return self.cache.load(
|
||||||
|
'douyu', 'crypto-js-md5', min_ver='2024.07.04') or self._download_cryptojs_md5(video_id)
|
||||||
|
|
||||||
def _calc_sign(self, sign_func, video_id, a):
|
def _calc_sign(self, sign_func, video_id, a):
|
||||||
b = uuid.uuid4().hex
|
b = uuid.uuid4().hex
|
||||||
|
|
|
@ -346,8 +346,16 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
class DiscoveryPlusBaseIE(DPlayBaseIE):
|
||||||
|
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
|
||||||
|
|
||||||
|
_DISCO_CLIENT_VER = '27.43.0'
|
||||||
|
|
||||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||||
headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6'
|
headers.update({
|
||||||
|
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||||
|
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:{self._DISCO_CLIENT_VER}',
|
||||||
|
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||||
|
})
|
||||||
|
|
||||||
def _download_video_playback_info(self, disco_base, video_id, headers):
|
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
|
@ -368,6 +376,26 @@ def _real_extract(self, url):
|
||||||
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://go.discovery.com/video/in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5352642',
|
||||||
|
'display_id': 'in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Trapped in a Twister',
|
||||||
|
'description': 'Twisters destroy Midwest towns, trapping spotters in the eye of the storm.',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'season_number': 1,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'series': 'In The Eye Of The Storm',
|
||||||
|
'duration': 2490.237,
|
||||||
|
'upload_date': '20240715',
|
||||||
|
'timestamp': 1721008800,
|
||||||
|
'tags': [],
|
||||||
|
'creators': ['Discovery'],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/10/5e39637d-cabf-3ab3-8e9a-f4e9d37bc036.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4164906',
|
'id': '4164906',
|
||||||
|
@ -395,6 +423,26 @@ class GoDiscoveryIE(DiscoveryPlusBaseIE):
|
||||||
class TravelChannelIE(DiscoveryPlusBaseIE):
|
class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.travelchannel.com/video/the-dead-files-travel-channel/protect-the-children',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4710177',
|
||||||
|
'display_id': 'the-dead-files-travel-channel/protect-the-children',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Protect the Children',
|
||||||
|
'description': 'An evil presence threatens an Ohio woman\'s children and marriage.',
|
||||||
|
'season_number': 14,
|
||||||
|
'season': 'Season 14',
|
||||||
|
'episode_number': 10,
|
||||||
|
'episode': 'Episode 10',
|
||||||
|
'series': 'The Dead Files',
|
||||||
|
'duration': 2550.481,
|
||||||
|
'timestamp': 1664510400,
|
||||||
|
'upload_date': '20220930',
|
||||||
|
'tags': [],
|
||||||
|
'creators': ['Travel Channel'],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/17/5e45eace-de5d-343a-9293-f400a2aa77d5.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2220256',
|
'id': '2220256',
|
||||||
|
@ -422,6 +470,26 @@ class TravelChannelIE(DiscoveryPlusBaseIE):
|
||||||
class CookingChannelIE(DiscoveryPlusBaseIE):
|
class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.cookingchanneltv.com/video/bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5350005',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': 'bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson',
|
||||||
|
'title': 'Titans vs Marcus Samuelsson',
|
||||||
|
'description': 'Marcus Samuelsson throws his legendary global tricks at the Titans.',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'season_number': 3,
|
||||||
|
'season': 'Season 3',
|
||||||
|
'series': 'Bobby\'s Triple Threat',
|
||||||
|
'duration': 2520.851,
|
||||||
|
'upload_date': '20240710',
|
||||||
|
'timestamp': 1720573200,
|
||||||
|
'tags': [],
|
||||||
|
'creators': ['Food Network'],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/04/529cd095-27ec-35c5-84e9-90ebd3e5d2da.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2348634',
|
'id': '2348634',
|
||||||
|
@ -449,6 +517,22 @@ class CookingChannelIE(DiscoveryPlusBaseIE):
|
||||||
class HGTVUsaIE(DiscoveryPlusBaseIE):
|
class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.hgtv.com/video/flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5025585',
|
||||||
|
'display_id': 'flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Flip or Flop: The Final Flip',
|
||||||
|
'description': 'Tarek and Christina are going their separate ways after one last flip!',
|
||||||
|
'series': 'Flip or Flop: The Final Flip',
|
||||||
|
'duration': 2580.644,
|
||||||
|
'upload_date': '20231101',
|
||||||
|
'timestamp': 1698811200,
|
||||||
|
'tags': [],
|
||||||
|
'creators': ['HGTV'],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/11/27/455caa6c-1462-3f14-b63d-a026d7a5e6d3.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4289736',
|
'id': '4289736',
|
||||||
|
@ -476,6 +560,26 @@ class HGTVUsaIE(DiscoveryPlusBaseIE):
|
||||||
class FoodNetworkIE(DiscoveryPlusBaseIE):
|
class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.foodnetwork.com/video/guys-grocery-games-food-network/wild-in-the-aisles',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2152549',
|
||||||
|
'display_id': 'guys-grocery-games-food-network/wild-in-the-aisles',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Wild in the Aisles',
|
||||||
|
'description': 'The chefs make spaghetti and meatballs with "Out of Stock" ingredients.',
|
||||||
|
'season_number': 1,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'series': 'Guy\'s Grocery Games',
|
||||||
|
'tags': [],
|
||||||
|
'creators': ['Food Network'],
|
||||||
|
'duration': 2520.651,
|
||||||
|
'upload_date': '20230623',
|
||||||
|
'timestamp': 1687492800,
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/06/15/37fb5333-cad2-3dbb-af7c-c20ec77c89c6.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4116449',
|
'id': '4116449',
|
||||||
|
@ -503,6 +607,26 @@ class FoodNetworkIE(DiscoveryPlusBaseIE):
|
||||||
class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.destinationamerica.com/video/bbq-pit-wars-destination-america/smoke-on-the-water',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2218409',
|
||||||
|
'display_id': 'bbq-pit-wars-destination-america/smoke-on-the-water',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Smoke on the Water',
|
||||||
|
'description': 'The pitmasters head to Georgia for the Smoke on the Water BBQ Festival.',
|
||||||
|
'season_number': 2,
|
||||||
|
'season': 'Season 2',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'series': 'BBQ Pit Wars',
|
||||||
|
'tags': [],
|
||||||
|
'creators': ['Destination America'],
|
||||||
|
'duration': 2614.878,
|
||||||
|
'upload_date': '20230623',
|
||||||
|
'timestamp': 1687492800,
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/c0f8e85d-9a10-3e6f-8e43-f6faafa81ba2.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4210904',
|
'id': '4210904',
|
||||||
|
@ -530,6 +654,26 @@ class DestinationAmericaIE(DiscoveryPlusBaseIE):
|
||||||
class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.investigationdiscovery.com/video/deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5341132',
|
||||||
|
'display_id': 'deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'RIP Bianca',
|
||||||
|
'description': 'A teenage influencer discovers an online world of threat, harm and danger.',
|
||||||
|
'season_number': 1,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'episode_number': 3,
|
||||||
|
'episode': 'Episode 3',
|
||||||
|
'series': 'Deadly Influence: The Social Media Murders',
|
||||||
|
'creators': ['Investigation Discovery'],
|
||||||
|
'tags': [],
|
||||||
|
'duration': 2490.888,
|
||||||
|
'upload_date': '20240618',
|
||||||
|
'timestamp': 1718672400,
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2024/06/15/b567c774-9e44-3c6c-b0ba-db860a73e812.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2139409',
|
'id': '2139409',
|
||||||
|
@ -557,6 +701,26 @@ class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
|
||||||
class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.ahctv.com/video/blood-and-fury-americas-civil-war-ahc/battle-of-bull-run',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2139199',
|
||||||
|
'display_id': 'blood-and-fury-americas-civil-war-ahc/battle-of-bull-run',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Battle of Bull Run',
|
||||||
|
'description': 'Two untested armies clash in the first real battle of the Civil War.',
|
||||||
|
'season_number': 1,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'series': 'Blood and Fury: America\'s Civil War',
|
||||||
|
'duration': 2612.509,
|
||||||
|
'upload_date': '20220923',
|
||||||
|
'timestamp': 1663905600,
|
||||||
|
'creators': ['AHC'],
|
||||||
|
'tags': [],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/4af61bd7-d705-3108-82c4-1a6e541e20fa.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2309730',
|
'id': '2309730',
|
||||||
|
@ -584,6 +748,26 @@ class AmHistoryChannelIE(DiscoveryPlusBaseIE):
|
||||||
class ScienceChannelIE(DiscoveryPlusBaseIE):
|
class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.sciencechannel.com/video/spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2347335',
|
||||||
|
'display_id': 'spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mystery of the Dead Planets',
|
||||||
|
'description': 'Astronomers unmask the truly destructive nature of the cosmos.',
|
||||||
|
'season_number': 7,
|
||||||
|
'season': 'Season 7',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'series': 'Space\'s Deepest Secrets',
|
||||||
|
'duration': 2524.989,
|
||||||
|
'upload_date': '20230128',
|
||||||
|
'timestamp': 1674882000,
|
||||||
|
'creators': ['Science'],
|
||||||
|
'tags': [],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/30/3796829d-aead-3f9a-bd8d-e49048b3cdca.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2842849',
|
'id': '2842849',
|
||||||
|
@ -608,36 +792,29 @@ class ScienceChannelIE(DiscoveryPlusBaseIE):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class DIYNetworkIE(DiscoveryPlusBaseIE):
|
|
||||||
_VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2309730',
|
|
||||||
'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Bringing Beach Life to Texas',
|
|
||||||
'description': 'The Pool Kings give a family a day at the beach in their own backyard.',
|
|
||||||
'season_number': 10,
|
|
||||||
'episode_number': 2,
|
|
||||||
},
|
|
||||||
'skip': 'Available for Premium users',
|
|
||||||
}, {
|
|
||||||
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
_PRODUCT = 'diy'
|
|
||||||
_DISCO_API_PARAMS = {
|
|
||||||
'disco_host': 'us1-prod-direct.watch.diynetwork.com',
|
|
||||||
'realm': 'go',
|
|
||||||
'country': 'us',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.discoverylife.com/video/er-files-discovery-life-atve-us/sweet-charity',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2347614',
|
||||||
|
'display_id': 'er-files-discovery-life-atve-us/sweet-charity',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sweet Charity',
|
||||||
|
'description': 'The staff at Charity Hospital treat a serious foot infection.',
|
||||||
|
'season_number': 1,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'series': 'ER Files',
|
||||||
|
'duration': 2364.261,
|
||||||
|
'upload_date': '20230721',
|
||||||
|
'timestamp': 1689912000,
|
||||||
|
'creators': ['Discovery Life'],
|
||||||
|
'tags': [],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/16/4b6f0124-360b-3546-b6a4-5552db886b86.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2218238',
|
'id': '2218238',
|
||||||
|
@ -665,6 +842,26 @@ class DiscoveryLifeIE(DiscoveryPlusBaseIE):
|
||||||
class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.animalplanet.com/video/mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4650835',
|
||||||
|
'display_id': 'mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Demon of Peru',
|
||||||
|
'description': 'In Peru, a farming village is being terrorized by a “man-like beast.”',
|
||||||
|
'season_number': 1,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'episode_number': 4,
|
||||||
|
'episode': 'Episode 4',
|
||||||
|
'series': 'Mysterious Creatures with Forrest Galante',
|
||||||
|
'duration': 2490.488,
|
||||||
|
'upload_date': '20230111',
|
||||||
|
'timestamp': 1673413200,
|
||||||
|
'creators': ['Animal Planet'],
|
||||||
|
'tags': [],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/01/6dbaa833-9a2e-3fee-9381-c19eddf67c0c.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3338923',
|
'id': '3338923',
|
||||||
|
@ -692,6 +889,26 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE):
|
||||||
class TLCIE(DiscoveryPlusBaseIE):
|
class TLCIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://go.tlc.com/video/90-day-the-last-resort-tlc-atve-us/the-last-chance',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5186422',
|
||||||
|
'display_id': '90-day-the-last-resort-tlc-atve-us/the-last-chance',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Last Chance',
|
||||||
|
'description': 'Infidelity shakes Kalani and Asuelu\'s world, and Angela threatens divorce.',
|
||||||
|
'season_number': 1,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'series': '90 Day: The Last Resort',
|
||||||
|
'duration': 5123.91,
|
||||||
|
'upload_date': '20230815',
|
||||||
|
'timestamp': 1692061200,
|
||||||
|
'creators': ['TLC'],
|
||||||
|
'tags': [],
|
||||||
|
'thumbnail': 'https://us1-prod-images.disco-api.com/2023/08/08/0ee367e2-ac76-334d-bf23-dbf796696a24.jpeg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2206540',
|
'id': '2206540',
|
||||||
|
@ -716,93 +933,8 @@ class TLCIE(DiscoveryPlusBaseIE):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class MotorTrendIE(DiscoveryPlusBaseIE):
|
|
||||||
_VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '"4859182"',
|
|
||||||
'display_id': 'double-dakotas',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Double Dakotas',
|
|
||||||
'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
|
|
||||||
'season_number': 2,
|
|
||||||
'episode_number': 3,
|
|
||||||
},
|
|
||||||
'skip': 'Available for Premium users',
|
|
||||||
}, {
|
|
||||||
'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
_PRODUCT = 'vel'
|
|
||||||
_DISCO_API_PARAMS = {
|
|
||||||
'disco_host': 'us1-prod-direct.watch.motortrend.com',
|
|
||||||
'realm': 'go',
|
|
||||||
'country': 'us',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '37699',
|
|
||||||
'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback',
|
|
||||||
'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7',
|
|
||||||
'season_number': 5,
|
|
||||||
'episode_number': 52,
|
|
||||||
'episode': 'Episode 52',
|
|
||||||
'season': 'Season 5',
|
|
||||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
|
||||||
'timestamp': 1388534401,
|
|
||||||
'duration': 1887.345,
|
|
||||||
'creator': 'Originals',
|
|
||||||
'series': 'Roadkill',
|
|
||||||
'upload_date': '20140101',
|
|
||||||
'tags': [],
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4922860',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Roadworthy Rescues | Teaser Trailer',
|
|
||||||
'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
|
|
||||||
'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
|
|
||||||
'creator': 'Originals',
|
|
||||||
'series': 'Roadworthy Rescues',
|
|
||||||
'thumbnail': r're:^https?://.+\.jpe?g$',
|
|
||||||
'upload_date': '20220907',
|
|
||||||
'timestamp': 1662523200,
|
|
||||||
'duration': 1066.356,
|
|
||||||
'tags': [],
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
_PRODUCT = 'MTOD'
|
|
||||||
_DISCO_API_PARAMS = {
|
|
||||||
'disco_host': 'us1-prod-direct.motortrendondemand.com',
|
|
||||||
'realm': 'motortrend',
|
|
||||||
'country': 'us',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
|
||||||
headers.update({
|
|
||||||
'x-disco-params': f'realm={realm}',
|
|
||||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1',
|
|
||||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
|
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -823,14 +955,45 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers',
|
'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PRODUCT = 'dplus_us'
|
_PRODUCT = None
|
||||||
_DISCO_API_PARAMS = {
|
_DISCO_API_PARAMS = None
|
||||||
|
|
||||||
|
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||||
|
headers.update({
|
||||||
|
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||||
|
'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}',
|
||||||
|
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||||
|
})
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, country = self._match_valid_url(url).group('id', 'country')
|
||||||
|
if not country:
|
||||||
|
country = 'us'
|
||||||
|
|
||||||
|
self._PRODUCT = f'dplus_{country}'
|
||||||
|
|
||||||
|
if country in ('br', 'ca', 'us'):
|
||||||
|
self._DISCO_API_PARAMS = {
|
||||||
'disco_host': 'us1-prod-direct.discoveryplus.com',
|
'disco_host': 'us1-prod-direct.discoveryplus.com',
|
||||||
'realm': 'go',
|
'realm': 'go',
|
||||||
'country': 'us',
|
'country': country,
|
||||||
}
|
}
|
||||||
|
else:
|
||||||
|
self._DISCO_API_PARAMS = {
|
||||||
|
'disco_host': 'eu1-prod-direct.discoveryplus.com',
|
||||||
|
'realm': 'dplay',
|
||||||
|
'country': country,
|
||||||
|
}
|
||||||
|
|
||||||
|
return self._get_disco_api_info(url, video_id, **self._DISCO_API_PARAMS)
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
|
class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
|
||||||
|
@ -993,7 +1156,7 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PRODUCT = 'dplus_us'
|
_PRODUCT = 'dplus_it'
|
||||||
_DISCO_API_PARAMS = {
|
_DISCO_API_PARAMS = {
|
||||||
'disco_host': 'eu1-prod-direct.discoveryplus.com',
|
'disco_host': 'eu1-prod-direct.discoveryplus.com',
|
||||||
'realm': 'dplay',
|
'realm': 'dplay',
|
||||||
|
@ -1002,8 +1165,8 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
|
||||||
|
|
||||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||||
headers.update({
|
headers.update({
|
||||||
'x-disco-params': f'realm={realm}',
|
'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
|
||||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6',
|
'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}',
|
||||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -1044,39 +1207,3 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
|
||||||
_SHOW_STR = 'show'
|
_SHOW_STR = 'show'
|
||||||
_INDEX = 4
|
_INDEX = 4
|
||||||
_VIDEO_IE = DiscoveryPlusIndiaIE
|
_VIDEO_IE = DiscoveryPlusIndiaIE
|
||||||
|
|
||||||
|
|
||||||
class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
|
|
||||||
_VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1397691',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'The Athertons: Mountain Biking\'s Fastest Family',
|
|
||||||
'description': 'md5:75a81937fcd8b989eec6083a709cd837',
|
|
||||||
'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
|
|
||||||
'series': 'gcn',
|
|
||||||
'creator': 'Gcn',
|
|
||||||
'upload_date': '20210309',
|
|
||||||
'timestamp': 1615248000,
|
|
||||||
'duration': 2531.0,
|
|
||||||
'tags': [],
|
|
||||||
},
|
|
||||||
'skip': 'Subscription required',
|
|
||||||
'params': {'skip_download': 'm3u8'},
|
|
||||||
}]
|
|
||||||
|
|
||||||
_PRODUCT = 'web'
|
|
||||||
_DISCO_API_PARAMS = {
|
|
||||||
'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
|
|
||||||
'realm': 'gcn',
|
|
||||||
'country': 'us',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
|
||||||
headers.update({
|
|
||||||
'x-disco-params': f'realm={realm}',
|
|
||||||
'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
|
|
||||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
|
||||||
})
|
|
||||||
|
|
|
@ -1,6 +1,11 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking import Request
|
from ..networking import Request
|
||||||
from ..utils import float_or_none, int_or_none, parse_iso8601
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class EitbIE(InfoExtractor):
|
class EitbIE(InfoExtractor):
|
||||||
|
@ -37,12 +42,9 @@ def _real_extract(self, url):
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
|
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
|
||||||
format_id = 'http'
|
|
||||||
if tbr:
|
|
||||||
format_id += f'-{int(tbr)}'
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': rendition['PMD_URL'],
|
'url': rendition['PMD_URL'],
|
||||||
'format_id': format_id,
|
'format_id': join_nonempty('http', int_or_none(tbr)),
|
||||||
'width': int_or_none(rendition.get('FRAME_WIDTH')),
|
'width': int_or_none(rendition.get('FRAME_WIDTH')),
|
||||||
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
|
'height': int_or_none(rendition.get('FRAME_HEIGHT')),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
|
@ -13,7 +14,7 @@
|
||||||
|
|
||||||
|
|
||||||
class EpidemicSoundIE(InfoExtractor):
|
class EpidemicSoundIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/(?:(?P<sfx>sound-effects/tracks)|track)/(?P<id>[0-9a-zA-Z-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
|
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
|
||||||
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
|
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
|
||||||
|
@ -47,6 +48,20 @@ class EpidemicSoundIE(InfoExtractor):
|
||||||
'release_timestamp': 1700535606,
|
'release_timestamp': 1700535606,
|
||||||
'release_date': '20231121',
|
'release_date': '20231121',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.epidemicsound.com/sound-effects/tracks/2f02f54b-9faa-4daf-abac-1cfe9e9cef69/',
|
||||||
|
'md5': '35d7cf05bd8b614a84f0495a05de9388',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '208931',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'upload_date': '20240603',
|
||||||
|
'timestamp': 1717436529,
|
||||||
|
'categories': ['appliance'],
|
||||||
|
'display_id': '6b2NXLURPr',
|
||||||
|
'duration': 1.0,
|
||||||
|
'title': 'Oven, Grill, Door Open 01',
|
||||||
|
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -77,8 +92,10 @@ def _epidemic_fmt_or_none(f):
|
||||||
return f
|
return f
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id, is_sfx = self._match_valid_url(url).group('id', 'sfx')
|
||||||
json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id)
|
json_data = self._download_json(join_nonempty(
|
||||||
|
'https://www.epidemicsound.com/json/track',
|
||||||
|
is_sfx and 'kosmos-id', video_id, delim='/'), video_id)
|
||||||
|
|
||||||
thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
|
thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
|
||||||
thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
|
thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
|
||||||
|
|
|
@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'proxy': '127.0.0.1:8118',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# New (May 2016) URL layout
|
# New (May 2016) URL layout
|
||||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||||
|
|
|
@ -571,16 +571,21 @@ def process_formats(info):
|
||||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||||
|
|
||||||
def extract_relay_data(_filter):
|
def yield_all_relay_data(_filter):
|
||||||
return self._parse_json(self._search_regex(
|
for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage):
|
||||||
rf'data-sjs>({{.*?{_filter}.*?}})</script>',
|
yield self._parse_json(relay_data, video_id, fatal=False) or {}
|
||||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
|
||||||
|
|
||||||
def extract_relay_prefetched_data(_filter):
|
def extract_relay_data(_filter):
|
||||||
return traverse_obj(extract_relay_data(_filter), (
|
return next(filter(None, yield_all_relay_data(_filter)), {})
|
||||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
|
||||||
|
def extract_relay_prefetched_data(_filter, target_keys=None):
|
||||||
|
path = 'data'
|
||||||
|
if target_keys is not None:
|
||||||
|
path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys))
|
||||||
|
return traverse_obj(yield_all_relay_data(_filter), (
|
||||||
|
..., 'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||||
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
||||||
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {}
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(self._search_regex([
|
server_js_data = self._parse_json(self._search_regex([
|
||||||
|
@ -591,7 +596,8 @@ def extract_relay_prefetched_data(_filter):
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
data = extract_relay_prefetched_data(
|
data = extract_relay_prefetched_data(
|
||||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)',
|
||||||
|
target_keys=('video', 'event', 'nodes', 'node', 'mediaset'))
|
||||||
if data:
|
if data:
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
|
@ -621,6 +627,9 @@ def parse_graphql_video(video):
|
||||||
'url': playable_url,
|
'url': playable_url,
|
||||||
})
|
})
|
||||||
extract_dash_manifest(video, formats)
|
extract_dash_manifest(video, formats)
|
||||||
|
if not formats:
|
||||||
|
# Do not append false positive entry w/o any formats
|
||||||
|
return
|
||||||
|
|
||||||
automatic_captions, subtitles = {}, {}
|
automatic_captions, subtitles = {}, {}
|
||||||
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||||
|
|
|
@ -43,6 +43,7 @@
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
|
from ..utils._utils import _UnsafeExtensionError
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
|
@ -2446,9 +2447,13 @@ def _real_extract(self, url):
|
||||||
if not is_html(first_bytes):
|
if not is_html(first_bytes):
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'URL could be a direct video link, returning it as such.')
|
'URL could be a direct video link, returning it as such.')
|
||||||
|
ext = determine_ext(url)
|
||||||
|
if ext not in _UnsafeExtensionError.ALLOWED_EXTENSIONS:
|
||||||
|
ext = 'unknown_video'
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'direct': True,
|
'direct': True,
|
||||||
'url': url,
|
'url': url,
|
||||||
|
'ext': ext,
|
||||||
})
|
})
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_start,
|
remove_start,
|
||||||
|
@ -287,7 +288,7 @@ def _real_extract(self, url):
|
||||||
if mobj:
|
if mobj:
|
||||||
height = int(mobj.group(2))
|
height = int(mobj.group(2))
|
||||||
f.update({
|
f.update({
|
||||||
'format_id': (f'{format_id}-' if format_id else '') + f'{height}P',
|
'format_id': join_nonempty(format_id, f'{height}P'),
|
||||||
'width': int(mobj.group(1)),
|
'width': int(mobj.group(1)),
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
|
|
32
yt_dlp/extractor/graspop.py
Normal file
32
yt_dlp/extractor/graspop.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import update_url, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class GraspopIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '101556',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Thy Art Is Murder',
|
||||||
|
'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
metadata = self._download_json(
|
||||||
|
f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': self._extract_m3u8_formats(
|
||||||
|
# Downgrade manifest request to avoid incomplete certificate chain error
|
||||||
|
update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'),
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'title': ('name', {str}),
|
||||||
|
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
|
@ -3,6 +3,7 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
urljoin,
|
urljoin,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
|
@ -69,7 +70,7 @@ def _extract_info(self, url, display_id):
|
||||||
height = format_info.get('height')
|
height = format_info.get('height')
|
||||||
fmt = {
|
fmt = {
|
||||||
'url': path,
|
'url': path,
|
||||||
'format_id': 'http{}'.format(f'-{height}p' if height else ''),
|
'format_id': join_nonempty('http'. height and f'{height}p'),
|
||||||
'width': format_info.get('width'),
|
'width': format_info.get('width'),
|
||||||
'height': height,
|
'height': height,
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,9 +44,6 @@ class HKETVIE(InfoExtractor):
|
||||||
'duration': 907,
|
'duration': 907,
|
||||||
'subtitles': {},
|
'subtitles': {},
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'geo_verification_proxy': '<HK proxy here>',
|
|
||||||
},
|
|
||||||
'skip': 'Geo restricted to HK',
|
'skip': 'Geo restricted to HK',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
|
@ -453,7 +453,7 @@ def _real_extract(self, url):
|
||||||
else:
|
else:
|
||||||
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
|
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False)
|
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or ''
|
||||||
additional_data = self._search_json(
|
additional_data = self._search_json(
|
||||||
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
|
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
|
||||||
if not additional_data and not media:
|
if not additional_data and not media:
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import urllib.error
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
|
@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE):
|
||||||
'title': 'naagin',
|
'title': 'naagin',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 120,
|
'playlist_mincount': 120,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3499820',
|
||||||
|
'title': 'mtv-splitsvilla-x5',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 310,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _entries(self, series_id):
|
def _entries(self, series_id):
|
||||||
seasons = self._download_json(
|
seasons = traverse_obj(self._download_json(
|
||||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id,
|
f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
|
||||||
'Downloading series metadata JSON', query={
|
'Downloading series metadata JSON', query={'responseType': 'common'}), (
|
||||||
'sort': 'season:asc',
|
'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
|
||||||
'id': series_id,
|
'trayTabs', lambda _, v: v['id']))
|
||||||
'responseType': 'common',
|
|
||||||
})
|
|
||||||
|
|
||||||
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1):
|
for season_num, season in enumerate(seasons, start=1):
|
||||||
season_id = season['id']
|
season_id = season['id']
|
||||||
label = season.get('season') or season_num
|
label = season.get('label') or season_num
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
episodes = traverse_obj(self._download_json(
|
episodes = traverse_obj(self._download_json(
|
||||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
||||||
|
|
|
@ -158,7 +158,7 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||||
IE_NAME = 'jiosaavn:playlist'
|
IE_NAME = 'jiosaavn:playlist'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -173,6 +173,13 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||||
'title': 'Mood Hindi',
|
'title': 'Mood Hindi',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 801,
|
'playlist_mincount': 801,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Me5RridRfDk_',
|
||||||
|
'title': 'Taaza Tunes',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 301,
|
||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 50
|
_PAGE_SIZE = 50
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,14 @@
|
||||||
|
import functools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
|
parse_iso8601,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -25,104 +30,192 @@ def _real_initialize(self):
|
||||||
|
|
||||||
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
f'https://kick.com/api/v1/{path}', display_id, note=note,
|
f'https://kick.com/api/{path}', display_id, note=note,
|
||||||
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
|
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class KickIE(KickBaseIE):
|
class KickIE(KickBaseIE):
|
||||||
|
IE_NAME = 'kick:live'
|
||||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
|
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://kick.com/yuppy',
|
'url': 'https://kick.com/buddha',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
|
'id': '92722911-nopixel-40',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': str,
|
'title': str,
|
||||||
'description': str,
|
'description': str,
|
||||||
'channel': 'yuppy',
|
|
||||||
'channel_id': '33538',
|
|
||||||
'uploader': 'Yuppy',
|
|
||||||
'uploader_id': '33793',
|
|
||||||
'upload_date': str,
|
|
||||||
'live_status': 'is_live',
|
|
||||||
'timestamp': int,
|
'timestamp': int,
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:https?://.+\.jpg',
|
||||||
'categories': list,
|
'categories': list,
|
||||||
|
'upload_date': str,
|
||||||
|
'channel': 'buddha',
|
||||||
|
'channel_id': '32807',
|
||||||
|
'uploader': 'Buddha',
|
||||||
|
'uploader_id': '33057',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'concurrent_view_count': int,
|
||||||
|
'release_timestamp': int,
|
||||||
|
'age_limit': 18,
|
||||||
|
'release_date': str,
|
||||||
},
|
},
|
||||||
'skip': 'livestream',
|
'params': {'skip_download': 'livestream'},
|
||||||
|
# 'skip': 'livestream',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://kick.com/kmack710',
|
'url': 'https://kick.com/xqc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if KickClipIE.suitable(url) else super().suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel = self._match_id(url)
|
channel = self._match_id(url)
|
||||||
response = self._call_api(f'channels/{channel}', channel)
|
response = self._call_api(f'v2/channels/{channel}', channel)
|
||||||
if not traverse_obj(response, 'livestream', expected_type=dict):
|
if not traverse_obj(response, 'livestream', expected_type=dict):
|
||||||
raise UserNotLive(video_id=channel)
|
raise UserNotLive(video_id=channel)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': str(traverse_obj(
|
|
||||||
response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
|
|
||||||
'formats': self._extract_m3u8_formats(
|
|
||||||
response['playback_url'], channel, 'mp4', live=True),
|
|
||||||
'title': traverse_obj(
|
|
||||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
|
||||||
'description': traverse_obj(response, ('user', 'bio')),
|
|
||||||
'channel': channel,
|
'channel': channel,
|
||||||
'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
|
|
||||||
'uploader': traverse_obj(response, 'name', ('user', 'username')),
|
|
||||||
'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
|
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
|
'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True),
|
||||||
'thumbnail': traverse_obj(
|
**traverse_obj(response, {
|
||||||
response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
|
'id': ('livestream', 'slug', {str}),
|
||||||
'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
|
'title': ('livestream', 'session_title', {str}),
|
||||||
|
'description': ('user', 'bio', {str}),
|
||||||
|
'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any),
|
||||||
|
'uploader': (('name', ('user', 'username')), {str}, any),
|
||||||
|
'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any),
|
||||||
|
'timestamp': ('livestream', 'created_at', {unified_timestamp}),
|
||||||
|
'release_timestamp': ('livestream', 'start_time', {unified_timestamp}),
|
||||||
|
'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}),
|
||||||
|
'categories': ('recent_categories', ..., 'name', {str}),
|
||||||
|
'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}),
|
||||||
|
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class KickVODIE(KickBaseIE):
|
class KickVODIE(KickBaseIE):
|
||||||
|
IE_NAME = 'kick:vod'
|
||||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
|
'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c',
|
||||||
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
|
'id': 'e74614f4-5270-4319-90ad-32179f19a45c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
|
'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+',
|
||||||
'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
|
'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
|
||||||
'channel': 'jaredfps',
|
'channel': 'xqc',
|
||||||
'channel_id': '26608',
|
'channel_id': '668',
|
||||||
'uploader': 'JaredFPS',
|
'uploader': 'xQc',
|
||||||
'uploader_id': '26799',
|
'uploader_id': '676',
|
||||||
'upload_date': '20240402',
|
'upload_date': '20240724',
|
||||||
'timestamp': 1712097108,
|
'timestamp': 1721796562,
|
||||||
'duration': 33859.0,
|
'duration': 18566.0,
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'categories': ['Call of Duty: Warzone'],
|
'view_count': int,
|
||||||
|
'categories': ['VALORANT'],
|
||||||
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {'skip_download': 'm3u8'},
|
||||||
'skip_download': 'm3u8',
|
|
||||||
},
|
|
||||||
'expected_warnings': [r'impersonation'],
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
response = self._call_api(f'video/{video_id}', video_id)
|
response = self._call_api(f'v1/video/{video_id}', video_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
|
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
|
||||||
'title': traverse_obj(
|
**traverse_obj(response, {
|
||||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
'title': ('livestream', ('session_title', 'slug'), {str}, any),
|
||||||
'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
|
'description': ('livestream', 'channel', 'user', 'bio', {str}),
|
||||||
'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
|
'channel': ('livestream', 'channel', 'slug', {str}),
|
||||||
'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
|
'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}),
|
||||||
'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
|
'uploader': ('livestream', 'channel', 'user', 'username', {str}),
|
||||||
'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
|
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
|
||||||
'timestamp': unified_timestamp(response.get('created_at')),
|
'timestamp': ('created_at', {parse_iso8601}),
|
||||||
'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
|
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
|
||||||
'thumbnail': traverse_obj(
|
'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
|
||||||
response, ('livestream', 'thumbnail'), expected_type=url_or_none),
|
'categories': ('livestream', 'categories', ..., 'name', {str}),
|
||||||
'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
|
'view_count': ('views', {int_or_none}),
|
||||||
|
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class KickClipIE(KickBaseIE):
|
||||||
|
IE_NAME = 'kick:clips'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?P<id>clip_[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Maddy detains Abd D:',
|
||||||
|
'channel': 'mxddy',
|
||||||
|
'channel_id': '133789',
|
||||||
|
'uploader': 'AbdCreates',
|
||||||
|
'uploader_id': '3309077',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpeg',
|
||||||
|
'duration': 35,
|
||||||
|
'timestamp': 1682481453,
|
||||||
|
'upload_date': '20230426',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'categories': ['VALORANT'],
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'clip_01H9SKET879NE7N9RJRRDS98J3',
|
||||||
|
'title': 'W jews',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'channel': 'destiny',
|
||||||
|
'channel_id': '1772249',
|
||||||
|
'uploader': 'punished_furry',
|
||||||
|
'uploader_id': '2027722',
|
||||||
|
'duration': 49.0,
|
||||||
|
'upload_date': '20230908',
|
||||||
|
'timestamp': 1694150180,
|
||||||
|
'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'categories': ['Just Chatting'],
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
clip_id = self._match_id(url)
|
||||||
|
clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip']
|
||||||
|
clip_url = clip['clip_url']
|
||||||
|
|
||||||
|
if determine_ext(clip_url) == 'm3u8':
|
||||||
|
formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4')
|
||||||
|
else:
|
||||||
|
formats = [{'url': clip_url}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': clip_id,
|
||||||
|
'formats': formats,
|
||||||
|
**traverse_obj(clip, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'channel': ('channel', 'slug', {str}),
|
||||||
|
'channel_id': ('channel', 'id', {int}, {str_or_none}),
|
||||||
|
'uploader': ('creator', 'username', {str}),
|
||||||
|
'uploader_id': ('creator', 'id', {int}, {str_or_none}),
|
||||||
|
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
'categories': ('category', 'name', {str}, all),
|
||||||
|
'timestamp': ('created_at', {parse_iso8601}),
|
||||||
|
'view_count': ('views', {int_or_none}),
|
||||||
|
'like_count': ('likes', {int_or_none}),
|
||||||
|
'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
114
yt_dlp/extractor/laracasts.py
Normal file
114
yt_dlp/extractor/laracasts.py
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_html_by_id,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
str_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class LaracastsBaseIE(InfoExtractor):
|
||||||
|
def _get_prop_data(self, url, display_id):
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
return traverse_obj(
|
||||||
|
get_element_html_by_id('app', webpage),
|
||||||
|
({extract_attributes}, 'data-page', {json.loads}, 'props'))
|
||||||
|
|
||||||
|
def _parse_episode(self, episode):
|
||||||
|
if not traverse_obj(episode, 'vimeoId'):
|
||||||
|
self.raise_login_required('This video is only available for subscribers.')
|
||||||
|
return self.url_result(
|
||||||
|
VimeoIE._smuggle_referrer(
|
||||||
|
f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
|
||||||
|
VimeoIE, url_transparent=True,
|
||||||
|
**traverse_obj(episode, {
|
||||||
|
'id': ('id', {int}, {str_or_none}),
|
||||||
|
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
|
||||||
|
'title': ('title', {clean_html}),
|
||||||
|
'season_number': ('chapter', {int_or_none}),
|
||||||
|
'episode_number': ('position', {int_or_none}),
|
||||||
|
'description': ('body', {clean_html}),
|
||||||
|
'thumbnail': ('largeThumbnail', {url_or_none}),
|
||||||
|
'duration': ('length', {int_or_none}),
|
||||||
|
'date': ('dateSegments', 'published', {unified_strdate}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
|
||||||
|
class LaracastsIE(LaracastsBaseIE):
|
||||||
|
IE_NAME = 'laracasts'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
|
||||||
|
'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '922040563',
|
||||||
|
'title': 'Hello, Laravel',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'duration': 519,
|
||||||
|
'date': '20240312',
|
||||||
|
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
|
||||||
|
'description': 'md5:ddd658bb241975871d236555657e1dd1',
|
||||||
|
'season_number': 1,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
'uploader': 'Laracasts',
|
||||||
|
'uploader_id': 'user20182673',
|
||||||
|
'uploader_url': 'https://vimeo.com/user20182673',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
|
||||||
|
|
||||||
|
|
||||||
|
class LaracastsPlaylistIE(LaracastsBaseIE):
|
||||||
|
IE_NAME = 'laracasts:series'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
|
||||||
|
'info_dict': {
|
||||||
|
'title': '30 Days to Learn Laravel',
|
||||||
|
'id': '210',
|
||||||
|
'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
|
||||||
|
'duration': 30600.0,
|
||||||
|
'modified_date': '20240511',
|
||||||
|
'description': 'md5:27c260a1668a450984e8f901579912dd',
|
||||||
|
'categories': ['Frameworks'],
|
||||||
|
'tags': ['Laravel'],
|
||||||
|
'display_id': '30-days-to-learn-laravel-11',
|
||||||
|
},
|
||||||
|
'playlist_count': 30,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
series = self._get_prop_data(url, display_id)['series']
|
||||||
|
|
||||||
|
metadata = {
|
||||||
|
'display_id': display_id,
|
||||||
|
**traverse_obj(series, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'id': ('id', {int}, {str_or_none}),
|
||||||
|
'description': ('body', {clean_html}),
|
||||||
|
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
|
||||||
|
'duration': ('runTime', {parse_duration}),
|
||||||
|
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
|
||||||
|
'tags': ('topics', ..., 'name', {str}),
|
||||||
|
'modified_date': ('lastUpdated', {unified_strdate}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.playlist_result(traverse_obj(
|
||||||
|
series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)
|
78
yt_dlp/extractor/learningonscreen.py
Normal file
78
yt_dlp/extractor/learningonscreen.py
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
import functools
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_html_by_id,
|
||||||
|
join_nonempty,
|
||||||
|
parse_duration,
|
||||||
|
unified_timestamp,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class LearningOnScreenIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '005D81B2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Planet Earth',
|
||||||
|
'duration': 3600.0,
|
||||||
|
'timestamp': 1164567600.0,
|
||||||
|
'upload_date': '20061126',
|
||||||
|
'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||||
|
self.raise_login_required(
|
||||||
|
'Use --cookies for authentication. See '
|
||||||
|
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
|
||||||
|
'for how to manually pass cookies', method=None)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
details = traverse_obj(webpage, (
|
||||||
|
{functools.partial(get_element_html_by_id, 'programme-details')}, {
|
||||||
|
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
|
||||||
|
'timestamp': (
|
||||||
|
{functools.partial(get_element_by_class, 'broadcast-date')},
|
||||||
|
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
|
||||||
|
'duration': (
|
||||||
|
{functools.partial(get_element_by_class, 'prog-running-time')},
|
||||||
|
{clean_html}, {parse_duration}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
title = details.pop('title', None) or traverse_obj(webpage, (
|
||||||
|
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
|
||||||
|
{extract_attributes}, 'data-record-title', {clean_html}))
|
||||||
|
|
||||||
|
entries = self._parse_html5_media_entries(
|
||||||
|
'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
|
||||||
|
_headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
|
||||||
|
if not entries:
|
||||||
|
raise ExtractorError('No video found')
|
||||||
|
|
||||||
|
if len(entries) > 1:
|
||||||
|
duration = details.pop('duration', None)
|
||||||
|
for idx, entry in enumerate(entries, start=1):
|
||||||
|
entry.update(details)
|
||||||
|
entry['id'] = join_nonempty(video_id, idx)
|
||||||
|
entry['title'] = join_nonempty(title, idx)
|
||||||
|
return self.playlist_result(entries, video_id, title, duration=duration)
|
||||||
|
|
||||||
|
return {
|
||||||
|
**entries[0],
|
||||||
|
**details,
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
}
|
|
@ -1,51 +1,35 @@
|
||||||
import random
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import xpath_text
|
|
||||||
|
|
||||||
|
|
||||||
class MatchTVIE(InfoExtractor):
|
class MatchTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
|
_VALID_URL = [
|
||||||
|
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
|
||||||
|
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
|
||||||
|
]
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://matchtv.ru/#live-player',
|
'url': 'http://matchtv.ru/on-air/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'matchtv-live',
|
'id': 'matchtv-live',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
'is_live': True,
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://matchtv.ru/on-air/',
|
'url': 'https://video.matchtv.ru/iframe/channel/106',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = 'matchtv-live'
|
video_id = 'matchtv-live'
|
||||||
video_url = self._download_json(
|
webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
|
||||||
'http://player.matchtv.ntvplus.tv/player/smil', video_id,
|
video_url = self._html_search_regex(
|
||||||
query={
|
r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
|
||||||
'ts': '',
|
|
||||||
'quality': 'SD',
|
|
||||||
'contentId': '561d2c0df7159b37178b4567',
|
|
||||||
'sign': '',
|
|
||||||
'includeHighlights': '0',
|
|
||||||
'userId': '',
|
|
||||||
'sessionId': random.randint(1, 1000000000),
|
|
||||||
'contentType': 'channel',
|
|
||||||
'timeShift': '0',
|
|
||||||
'platform': 'portal',
|
|
||||||
},
|
|
||||||
headers={
|
|
||||||
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
|
|
||||||
})['data']['videoUrl']
|
|
||||||
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
|
|
||||||
formats = self._extract_f4m_formats(f4m_url, video_id)
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': 'Матч ТВ - Прямой эфир',
|
'title': 'Матч ТВ - Прямой эфир',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'formats': formats,
|
'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,7 +133,9 @@ def _real_extract(self, url):
|
||||||
r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
|
r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
|
||||||
|
|
||||||
player_data['video'] = player_data.pop('token')
|
player_data['video'] = player_data.pop('token')
|
||||||
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
|
player_page = self._download_webpage(
|
||||||
|
'https://player.mediaklikk.hu/playernew/player.php', video_id,
|
||||||
|
query=player_data, headers={'Referer': url})
|
||||||
player_json = self._search_json(
|
player_json = self._search_json(
|
||||||
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
|
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
|
||||||
playlist_url = traverse_obj(
|
playlist_url = traverse_obj(
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
|
_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
|
||||||
|
|
||||||
|
@ -212,13 +213,14 @@ def _real_extract(self, url):
|
||||||
stream_type, 'type%u' % stream_type)
|
stream_type, 'type%u' % stream_type)
|
||||||
|
|
||||||
stream_formats = []
|
stream_formats = []
|
||||||
for unum, video_url in enumerate(video_urls):
|
for unum, video in enumerate(video_urls):
|
||||||
video_url = url_or_none(video_url.get('Location'))
|
video_url = url_or_none(video.get('Location'))
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
|
||||||
|
|
||||||
media_type = video_url.get('MediaType')
|
media_type = video.get('MediaType')
|
||||||
|
ext = mimetype2ext(video.get('MimeType'))
|
||||||
if media_type == 'SS':
|
if media_type == 'SS':
|
||||||
stream_formats.extend(self._extract_ism_formats(
|
stream_formats.extend(self._extract_ism_formats(
|
||||||
video_url, resource_id,
|
video_url, resource_id,
|
||||||
|
@ -229,15 +231,20 @@ def _real_extract(self, url):
|
||||||
video_url, resource_id,
|
video_url, resource_id,
|
||||||
mpd_id=f'{stream_id}-{snum}.{unum}',
|
mpd_id=f'{stream_id}-{snum}.{unum}',
|
||||||
fatal=False))
|
fatal=False))
|
||||||
|
elif ext in ('m3u', 'm3u8'):
|
||||||
|
stream_formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, resource_id,
|
||||||
|
m3u8_id=f'{stream_id}-{snum}.{unum}',
|
||||||
|
fatal=False))
|
||||||
else:
|
else:
|
||||||
stream_formats.append({
|
stream_formats.append({
|
||||||
'format_id': f'{stream_id}-{snum}.{unum}',
|
'format_id': f'{stream_id}-{snum}.{unum}',
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': mimetype2ext(video_url.get('MimeType')),
|
'ext': ext,
|
||||||
})
|
})
|
||||||
|
|
||||||
if stream.get('HasSlideContent', False):
|
images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}))
|
||||||
images = player_options['PlayerLayoutOptions']['Images']
|
if stream.get('HasSlideContent') and images:
|
||||||
stream_formats.append(self.__extract_slides(
|
stream_formats.append(self.__extract_slides(
|
||||||
stream_id=stream_id,
|
stream_id=stream_id,
|
||||||
snum=snum,
|
snum=snum,
|
||||||
|
|
|
@ -1,5 +1,14 @@
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none, traverse_obj, unified_timestamp
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
|
url_basename,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MicrosoftEmbedIE(InfoExtractor):
|
class MicrosoftEmbedIE(InfoExtractor):
|
||||||
|
@ -63,3 +72,250 @@ def _real_extract(self, url):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftMediusBaseIE(InfoExtractor):
|
||||||
|
@staticmethod
|
||||||
|
def _sub_to_dict(subtitle_list):
|
||||||
|
subtitles = {}
|
||||||
|
for sub in subtitle_list:
|
||||||
|
subtitles.setdefault(sub.pop('tag', 'und'), []).append(sub)
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _extract_ism(self, ism_url, video_id):
|
||||||
|
formats = self._extract_ism_formats(ism_url, video_id)
|
||||||
|
for fmt in formats:
|
||||||
|
if fmt['language'] != 'eng' and 'English' not in fmt['format_id']:
|
||||||
|
fmt['language_preference'] = -10
|
||||||
|
return formats
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftMediusIE(MicrosoftMediusBaseIE):
|
||||||
|
_VALID_URL = r'https?://medius\.microsoft\.com/Embed/(?:Video\?id=|video-nc/|VideoDetails/)(?P<id>[\da-f-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://medius.microsoft.com/Embed/video-nc/9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||||
|
'ext': 'ismv',
|
||||||
|
'title': 'Rapidly code, test and ship from secure cloud developer environments',
|
||||||
|
'description': 'md5:33c8e4facadc438613476eea24165f71',
|
||||||
|
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||||
|
'subtitles': 'count:30',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://medius.microsoft.com/Embed/video-nc/81215af5-c813-4dcd-aede-94f4e1a7daa3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '81215af5-c813-4dcd-aede-94f4e1a7daa3',
|
||||||
|
'ext': 'ismv',
|
||||||
|
'title': 'Microsoft Build opening',
|
||||||
|
'description': 'md5:43455096141077a1f23144cab8cec1cb',
|
||||||
|
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||||
|
'subtitles': 'count:31',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://medius.microsoft.com/Embed/VideoDetails/78493569-9b3b-4a85-a409-ee76e789e25c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '78493569-9b3b-4a85-a409-ee76e789e25c',
|
||||||
|
'ext': 'ismv',
|
||||||
|
'title': ' Anomaly Detection & Root cause at Edge',
|
||||||
|
'description': 'md5:f8f1ad93d7918649bfb97fa081b03b83',
|
||||||
|
'thumbnail': r're:https://mediusdownload.event.microsoft.com/asset.*\.jpg.*',
|
||||||
|
'subtitles': 'count:17',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://medius.microsoft.com/Embed/Video?id=0dc69bda-079b-4070-a7db-a8da1a06a9c7',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://medius.microsoft.com/Embed/video-nc/fe823a91-959c-465b-96d4-8f4db624f72c',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_subtitle(self, webpage, video_id):
|
||||||
|
captions = traverse_obj(
|
||||||
|
self._search_json(r'const\s+captionsConfiguration\s*=', webpage, 'captions', video_id, default=None),
|
||||||
|
('languageList', lambda _, v: url_or_none(v['src']), {
|
||||||
|
'url': 'src',
|
||||||
|
'tag': ('srclang', {str}),
|
||||||
|
'name': ('kind', {str}),
|
||||||
|
})) or [{'url': url, 'tag': url_basename(url).split('.vtt')[0].split('_')[-1]}
|
||||||
|
for url in re.findall(r'var\s+file\s+=\s+\{[^}]+\'(https://[^\']+\.vtt\?[^\']+)', webpage)]
|
||||||
|
|
||||||
|
return self._sub_to_dict(captions)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(f'https://medius.microsoft.com/Embed/video-nc/{video_id}', video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'formats': self._extract_ism(
|
||||||
|
self._search_regex(r'StreamUrl\s*=\s*"([^"]+manifest)"', webpage, 'ism url'), video_id),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'subtitles': self._extract_subtitle(webpage, video_id),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftLearnPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?(?P<type>shows|events)/(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bash-for-beginners',
|
||||||
|
'title': 'Bash for Beginners',
|
||||||
|
'description': 'md5:16a91c07222117d1e00912f0dbc02c2c',
|
||||||
|
},
|
||||||
|
'playlist_count': 20,
|
||||||
|
}, {
|
||||||
|
'url': 'https://learn.microsoft.com/en-us/events/build-2022',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'build-2022',
|
||||||
|
'title': 'Microsoft Build 2022 - Events',
|
||||||
|
'description': 'md5:c16b43848027df837b22c6fbac7648d3',
|
||||||
|
},
|
||||||
|
'playlist_count': 201,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _entries(self, url_base, video_id):
|
||||||
|
skip = 0
|
||||||
|
while True:
|
||||||
|
playlist_info = self._download_json(url_base, video_id, f'Downloading entries {skip}', query={
|
||||||
|
'locale': 'en-us',
|
||||||
|
'$skip': skip,
|
||||||
|
})
|
||||||
|
url_paths = traverse_obj(playlist_info, ('results', ..., 'url', {str}))
|
||||||
|
for url_path in url_paths:
|
||||||
|
yield self.url_result(f'https://learn.microsoft.com/en-us{url_path}')
|
||||||
|
skip += len(url_paths)
|
||||||
|
if skip >= playlist_info.get('count', 0) or not url_paths:
|
||||||
|
break
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
metainfo = {
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
||||||
|
sub_type = 'episodes' if playlist_type == 'shows' else 'sessions'
|
||||||
|
|
||||||
|
url_base = f'https://learn.microsoft.com/api/contentbrowser/search/{playlist_type}/{playlist_id}/{sub_type}'
|
||||||
|
return self.playlist_result(self._entries(url_base, playlist_id), playlist_id, **metainfo)
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftLearnEpisodeIE(MicrosoftMediusBaseIE):
|
||||||
|
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?shows/[\w-]+/(?P<id>[^?#/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners/what-is-the-difference-between-a-terminal-and-a-shell-2-of-20-bash-for-beginners/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd44e1a03-a0e5-45c2-9496-5c9fa08dc94c',
|
||||||
|
'ext': 'ismv',
|
||||||
|
'title': 'What is the Difference Between a Terminal and a Shell? (Part 2 of 20)',
|
||||||
|
'description': 'md5:7bbbfb593d21c2cf2babc3715ade6b88',
|
||||||
|
'timestamp': 1676339547,
|
||||||
|
'upload_date': '20230214',
|
||||||
|
'thumbnail': r're:https://learn\.microsoft\.com/video/media/.*\.png',
|
||||||
|
'subtitles': 'count:14',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
entry_id = self._html_search_meta('entryId', webpage, 'entryId', fatal=True)
|
||||||
|
video_info = self._download_json(
|
||||||
|
f'https://learn.microsoft.com/api/video/public/v1/entries/{entry_id}', video_id)
|
||||||
|
return {
|
||||||
|
'id': entry_id,
|
||||||
|
'formats': self._extract_ism(video_info['publicVideo']['adaptiveVideoUrl'], video_id),
|
||||||
|
'subtitles': self._sub_to_dict(traverse_obj(video_info, (
|
||||||
|
'publicVideo', 'captions', lambda _, v: url_or_none(v['url']), {
|
||||||
|
'tag': ('language', {str}),
|
||||||
|
'url': 'url',
|
||||||
|
}))),
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
**traverse_obj(video_info, {
|
||||||
|
'timestamp': ('createTime', {parse_iso8601}),
|
||||||
|
'thumbnails': ('publicVideo', 'thumbnailOtherSizes', ..., {'url': {url_or_none}}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftLearnSessionIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?events/[\w-]+/(?P<id>[^?#/]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://learn.microsoft.com/en-us/events/build-2022/ts01-rapidly-code-test-ship-from-secure-cloud-developer-environments',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
|
||||||
|
'ext': 'ismv',
|
||||||
|
'title': 'Rapidly code, test and ship from secure cloud developer environments - Events',
|
||||||
|
'description': 'md5:f26c1a85d41c1cffd27a0279254a25c3',
|
||||||
|
'timestamp': 1653408600,
|
||||||
|
'upload_date': '20220524',
|
||||||
|
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
metainfo = {
|
||||||
|
'title': self._og_search_title(webpage),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'timestamp': parse_iso8601(self._html_search_meta('startDate', webpage, 'startDate')),
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
self._html_search_meta('externalVideoUrl', webpage, 'videoUrl', fatal=True),
|
||||||
|
url_transparent=True, ie=MicrosoftMediusIE, **metainfo)
|
||||||
|
|
||||||
|
|
||||||
|
class MicrosoftBuildIE(InfoExtractor):
|
||||||
|
_VALID_URL = [
|
||||||
|
r'https?://build\.microsoft\.com/[\w-]+/sessions/(?P<id>[\da-f-]+)',
|
||||||
|
r'https?://build\.microsoft\.com/[\w-]+/(?P<id>sessions)/?(?:[?#]|$)',
|
||||||
|
]
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://build.microsoft.com/en-US/sessions/b49feb31-afcd-4217-a538-d3ca1d171198?source=sessions',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aee55fb5-fcf9-4b38-b764-a3527cb57554',
|
||||||
|
'ext': 'ismv',
|
||||||
|
'title': 'Microsoft Build opening keynote',
|
||||||
|
'description': 'md5:d38338f336ef4b6ef9ad2a7466a76655',
|
||||||
|
'timestamp': 1716307200,
|
||||||
|
'upload_date': '20240521',
|
||||||
|
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://build.microsoft.com/en-US/sessions',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sessions',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 418,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result(
|
||||||
|
video_info['onDemand'], ie=MicrosoftMediusIE, url_transparent=True, **traverse_obj(video_info, {
|
||||||
|
'id': ('sessionId', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'timestamp': ('startDateTime', {parse_iso8601}),
|
||||||
|
}))
|
||||||
|
for video_info in self._download_json(
|
||||||
|
'https://api-v2.build.microsoft.com/api/session/all/en-US', video_id, 'Downloading video info')
|
||||||
|
]
|
||||||
|
if video_id == 'sessions':
|
||||||
|
return self.playlist_result(entries, video_id)
|
||||||
|
else:
|
||||||
|
return traverse_obj(entries, (lambda _, v: v['id'] == video_id), get_all=False)
|
||||||
|
|
|
@ -1,188 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
smuggle_url,
|
|
||||||
unsmuggle_url,
|
|
||||||
xpath_text,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
|
|
||||||
def _extract_base_url(self, course_id, display_id):
|
|
||||||
return self._download_json(
|
|
||||||
f'https://api-mlxprod.microsoft.com/services/products/anonymous/{course_id}',
|
|
||||||
display_id, 'Downloading course base URL')
|
|
||||||
|
|
||||||
def _extract_chapter_and_title(self, title):
|
|
||||||
if not title:
|
|
||||||
return None, None
|
|
||||||
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
|
|
||||||
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
|
|
||||||
|
|
||||||
|
|
||||||
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
|
|
||||||
IE_NAME = 'mva'
|
|
||||||
IE_DESC = 'Microsoft Virtual Academy videos'
|
|
||||||
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
|
|
||||||
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'gfVXISmEB_6804984382',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Course Introduction',
|
|
||||||
'formats': 'mincount:3',
|
|
||||||
'subtitles': {
|
|
||||||
'en': [{
|
|
||||||
'ext': 'ttml',
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'mva:11788:gfVXISmEB_6804984382',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
|
||||||
|
|
||||||
mobj = self._match_valid_url(url)
|
|
||||||
course_id = mobj.group('course_id')
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
|
|
||||||
|
|
||||||
settings = self._download_xml(
|
|
||||||
f'{base_url}/content/content_{video_id}/videosettings.xml?v=1',
|
|
||||||
video_id, 'Downloading video settings XML')
|
|
||||||
|
|
||||||
_, title = self._extract_chapter_and_title(xpath_text(
|
|
||||||
settings, './/Title', 'title', fatal=True))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
for sources in settings.findall('.//MediaSources'):
|
|
||||||
sources_type = sources.get('videoType')
|
|
||||||
for source in sources.findall('./MediaSource'):
|
|
||||||
video_url = source.text
|
|
||||||
if not video_url or not video_url.startswith('http'):
|
|
||||||
continue
|
|
||||||
if sources_type == 'smoothstreaming':
|
|
||||||
formats.extend(self._extract_ism_formats(
|
|
||||||
video_url, video_id, 'mss', fatal=False))
|
|
||||||
continue
|
|
||||||
video_mode = source.get('videoMode')
|
|
||||||
height = int_or_none(self._search_regex(
|
|
||||||
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
|
|
||||||
codec = source.get('codec')
|
|
||||||
acodec, vcodec = [None] * 2
|
|
||||||
if codec:
|
|
||||||
codecs = codec.split(',')
|
|
||||||
if len(codecs) == 2:
|
|
||||||
acodec, vcodec = codecs
|
|
||||||
elif len(codecs) == 1:
|
|
||||||
vcodec = codecs[0]
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': video_mode,
|
|
||||||
'height': height,
|
|
||||||
'acodec': acodec,
|
|
||||||
'vcodec': vcodec,
|
|
||||||
})
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
for source in settings.findall('.//MarkerResourceSource'):
|
|
||||||
subtitle_url = source.text
|
|
||||||
if not subtitle_url:
|
|
||||||
continue
|
|
||||||
subtitles.setdefault('en', []).append({
|
|
||||||
'url': f'{base_url}/{subtitle_url}',
|
|
||||||
'ext': source.get('type'),
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
|
|
||||||
IE_NAME = 'mva:course'
|
|
||||||
IE_DESC = 'Microsoft Virtual Academy courses'
|
|
||||||
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '11788',
|
|
||||||
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
|
|
||||||
},
|
|
||||||
'playlist_count': 36,
|
|
||||||
}, {
|
|
||||||
# with emphasized chapters
|
|
||||||
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '16335',
|
|
||||||
'title': 'Developing Windows 10 Games with Construct 2',
|
|
||||||
},
|
|
||||||
'playlist_count': 10,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'mva:course:11788',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return False if MicrosoftVirtualAcademyIE.suitable(url) else super().suitable(url)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = self._match_valid_url(url)
|
|
||||||
course_id = mobj.group('id')
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
|
|
||||||
base_url = self._extract_base_url(course_id, display_id)
|
|
||||||
|
|
||||||
manifest = self._download_json(
|
|
||||||
f'{base_url}/imsmanifestlite.json',
|
|
||||||
display_id, 'Downloading course manifest JSON')['manifest']
|
|
||||||
|
|
||||||
organization = manifest['organizations']['organization'][0]
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
for chapter in organization['item']:
|
|
||||||
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
|
|
||||||
chapter_id = chapter.get('@identifier')
|
|
||||||
for item in chapter.get('item', []):
|
|
||||||
item_id = item.get('@identifier')
|
|
||||||
if not item_id:
|
|
||||||
continue
|
|
||||||
metadata = item.get('resource', {}).get('metadata') or {}
|
|
||||||
if metadata.get('learningresourcetype') != 'Video':
|
|
||||||
continue
|
|
||||||
_, title = self._extract_chapter_and_title(item.get('title'))
|
|
||||||
duration = parse_duration(metadata.get('duration'))
|
|
||||||
description = metadata.get('description')
|
|
||||||
entries.append({
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': smuggle_url(
|
|
||||||
f'mva:{course_id}:{item_id}', {'base_url': base_url}),
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'chapter': chapter_title,
|
|
||||||
'chapter_number': chapter_number,
|
|
||||||
'chapter_id': chapter_id,
|
|
||||||
})
|
|
||||||
|
|
||||||
title = organization.get('title') or manifest.get('metadata', {}).get('title')
|
|
||||||
|
|
||||||
return self.playlist_result(entries, course_id, title)
|
|
|
@ -1,17 +1,23 @@
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
jwt_decode_hs256,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
traverse_obj,
|
|
||||||
try_get,
|
try_get,
|
||||||
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class MLBBaseIE(InfoExtractor):
|
class MLBBaseIE(InfoExtractor):
|
||||||
|
@ -275,76 +281,225 @@ def _download_video_data(self, display_id):
|
||||||
class MLBTVIE(InfoExtractor):
|
class MLBTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
||||||
_NETRC_MACHINE = 'mlb'
|
_NETRC_MACHINE = 'mlb'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '661581',
|
'id': '661581',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
||||||
|
'release_date': '20220702',
|
||||||
|
'release_timestamp': 1656792300,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {'skip_download': 'm3u8'},
|
||||||
'skip_download': True,
|
}, {
|
||||||
|
# makeup game: has multiple dates, need to avoid games with 'rescheduleDate'
|
||||||
|
'url': 'https://www.mlb.com/tv/g747039/vd22541c4-5a29-45f7-822b-635ec041cf5e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '747039',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '2024-07-29 - Toronto Blue Jays @ Baltimore Orioles',
|
||||||
|
'release_date': '20240729',
|
||||||
|
'release_timestamp': 1722280200,
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
_GRAPHQL_INIT_QUERY = '''\
|
||||||
|
mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
|
||||||
|
initSession(device: $device, clientType: $clientType, experience: $experience) {
|
||||||
|
deviceId
|
||||||
|
sessionId
|
||||||
|
entitlements {
|
||||||
|
code
|
||||||
|
}
|
||||||
|
location {
|
||||||
|
countryCode
|
||||||
|
regionName
|
||||||
|
zipCode
|
||||||
|
latitude
|
||||||
|
longitude
|
||||||
|
}
|
||||||
|
clientExperience
|
||||||
|
features
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
_GRAPHQL_PLAYBACK_QUERY = '''\
|
||||||
|
mutation initPlaybackSession(
|
||||||
|
$adCapabilities: [AdExperienceType]
|
||||||
|
$mediaId: String!
|
||||||
|
$deviceId: String!
|
||||||
|
$sessionId: String!
|
||||||
|
$quality: PlaybackQuality
|
||||||
|
) {
|
||||||
|
initPlaybackSession(
|
||||||
|
adCapabilities: $adCapabilities
|
||||||
|
mediaId: $mediaId
|
||||||
|
deviceId: $deviceId
|
||||||
|
sessionId: $sessionId
|
||||||
|
quality: $quality
|
||||||
|
) {
|
||||||
|
playbackSessionId
|
||||||
|
playback {
|
||||||
|
url
|
||||||
|
token
|
||||||
|
expiration
|
||||||
|
cdn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
_APP_VERSION = '7.8.2'
|
||||||
|
_device_id = None
|
||||||
|
_session_id = None
|
||||||
_access_token = None
|
_access_token = None
|
||||||
|
_token_expiry = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _api_headers(self):
|
||||||
|
if (self._token_expiry - 120) <= time.time():
|
||||||
|
self.write_debug('Access token has expired; re-logging in')
|
||||||
|
self._perform_login(*self._get_login_info())
|
||||||
|
return {'Authorization': f'Bearer {self._access_token}'}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if not self._access_token:
|
if not self._access_token:
|
||||||
self.raise_login_required(
|
self.raise_login_required(
|
||||||
'All videos are only available to registered users', method='password')
|
'All videos are only available to registered users', method='password')
|
||||||
|
|
||||||
|
def _set_device_id(self, username):
|
||||||
|
if not self._device_id:
|
||||||
|
self._device_id = self.cache.load(
|
||||||
|
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
||||||
|
if self._device_id:
|
||||||
|
return
|
||||||
|
self._device_id = str(uuid.uuid4())
|
||||||
|
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
|
try:
|
||||||
access_token = self._download_json(
|
|
||||||
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
|
||||||
headers={
|
|
||||||
'User-Agent': 'okhttp/3.12.1',
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
|
||||||
}, data=data.encode())['access_token']
|
|
||||||
|
|
||||||
entitlement = self._download_webpage(
|
|
||||||
f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None,
|
|
||||||
headers={
|
|
||||||
'User-Agent': 'okhttp/3.12.1',
|
|
||||||
'Authorization': f'Bearer {access_token}',
|
|
||||||
})
|
|
||||||
|
|
||||||
data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
|
|
||||||
self._access_token = self._download_json(
|
self._access_token = self._download_json(
|
||||||
'https://us.edge.bamgrid.com/token', None,
|
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
||||||
headers={
|
'Logging in', 'Unable to log in', headers={
|
||||||
'Accept': 'application/json',
|
'User-Agent': 'okhttp/3.12.1',
|
||||||
'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
}, data=data.encode())['access_token']
|
}, data=urlencode_postdata({
|
||||||
|
'grant_type': 'password',
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'scope': 'openid offline_access',
|
||||||
|
'client_id': '0oa3e1nutA1HLzAKG356',
|
||||||
|
}))['access_token']
|
||||||
|
except ExtractorError as error:
|
||||||
|
if isinstance(error.cause, HTTPError) and error.cause.status == 400:
|
||||||
|
raise ExtractorError('Invalid username or password', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0
|
||||||
|
self._set_device_id(username)
|
||||||
|
|
||||||
|
self._session_id = self._call_api({
|
||||||
|
'operationName': 'initSession',
|
||||||
|
'query': self._GRAPHQL_INIT_QUERY,
|
||||||
|
'variables': {
|
||||||
|
'device': {
|
||||||
|
'appVersion': self._APP_VERSION,
|
||||||
|
'deviceFamily': 'desktop',
|
||||||
|
'knownDeviceId': self._device_id,
|
||||||
|
'languagePreference': 'ENGLISH',
|
||||||
|
'manufacturer': '',
|
||||||
|
'model': '',
|
||||||
|
'os': '',
|
||||||
|
'osVersion': '',
|
||||||
|
},
|
||||||
|
'clientType': 'WEB',
|
||||||
|
},
|
||||||
|
}, None, 'session ID')['data']['initSession']['sessionId']
|
||||||
|
|
||||||
|
def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True):
|
||||||
|
return self._download_json(
|
||||||
|
'https://media-gateway.mlb.com/graphql', video_id,
|
||||||
|
f'Downloading {description}', f'Unable to download {description}', fatal=fatal,
|
||||||
|
headers={
|
||||||
|
**self._api_headers,
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-client-name': 'WEB',
|
||||||
|
'x-client-version': self._APP_VERSION,
|
||||||
|
}, data=json.dumps(data, separators=(',', ':')).encode())
|
||||||
|
|
||||||
|
def _extract_formats_and_subtitles(self, broadcast, video_id):
|
||||||
|
feed = traverse_obj(broadcast, ('homeAway', {str.title}))
|
||||||
|
medium = traverse_obj(broadcast, ('type', {str}))
|
||||||
|
language = traverse_obj(broadcast, ('language', {str.lower}))
|
||||||
|
format_id = join_nonempty(feed, medium, language)
|
||||||
|
|
||||||
|
response = self._call_api({
|
||||||
|
'operationName': 'initPlaybackSession',
|
||||||
|
'query': self._GRAPHQL_PLAYBACK_QUERY,
|
||||||
|
'variables': {
|
||||||
|
'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'],
|
||||||
|
'deviceId': self._device_id,
|
||||||
|
'mediaId': broadcast['mediaId'],
|
||||||
|
'quality': 'PLACEHOLDER',
|
||||||
|
'sessionId': self._session_id,
|
||||||
|
},
|
||||||
|
}, video_id, f'{format_id} broadcast JSON', fatal=False)
|
||||||
|
|
||||||
|
playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict}))
|
||||||
|
m3u8_url = traverse_obj(playback, ('url', {url_or_none}))
|
||||||
|
token = traverse_obj(playback, ('token', {str}))
|
||||||
|
|
||||||
|
if not (m3u8_url and token):
|
||||||
|
errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str})))
|
||||||
|
if 'not entitled' in errors:
|
||||||
|
raise ExtractorError(errors, expected=True)
|
||||||
|
elif errors: # Only warn when 'blacked out' since radio formats are available
|
||||||
|
self.report_warning(f'API returned errors for {format_id}: {errors}')
|
||||||
|
else:
|
||||||
|
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
|
||||||
|
return [], {}
|
||||||
|
|
||||||
|
cdn_headers = {'x-cdn-token': token}
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
|
||||||
|
m3u8_id=format_id, fatal=False, headers=cdn_headers)
|
||||||
|
for fmt in fmts:
|
||||||
|
fmt['http_headers'] = cdn_headers
|
||||||
|
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
|
||||||
|
fmt.setdefault('language', language)
|
||||||
|
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
|
||||||
|
fmt['source_preference'] = 10
|
||||||
|
|
||||||
|
return fmts, subs
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
airings = self._download_json(
|
data = self._download_json(
|
||||||
f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
|
'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
|
||||||
video_id)['data']['Airings']
|
'gamePk': video_id,
|
||||||
|
'hydrate': 'broadcasts(all),statusFlags',
|
||||||
|
})
|
||||||
|
metadata = traverse_obj(data, (
|
||||||
|
'dates', ..., 'games',
|
||||||
|
lambda _, v: str(v['gamePk']) == video_id and not v.get('rescheduleDate'), any))
|
||||||
|
|
||||||
|
broadcasts = traverse_obj(metadata, (
|
||||||
|
'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for airing in airings:
|
for broadcast in broadcasts:
|
||||||
m3u8_url = self._download_json(
|
fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id)
|
||||||
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
|
formats.extend(fmts)
|
||||||
headers={
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
'Authorization': self._access_token,
|
|
||||||
'Accept': 'application/vnd.media-service+json; version=2',
|
|
||||||
})['stream']['complete']
|
|
||||||
f, s = self._extract_m3u8_formats_and_subtitles(
|
|
||||||
m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage')))
|
|
||||||
formats.extend(f)
|
|
||||||
self._merge_subtitles(s, target=subtitles)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
|
'title': join_nonempty(
|
||||||
'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
|
traverse_obj(metadata, ('officialDate', {str})),
|
||||||
|
traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})),
|
||||||
|
delim=' - '),
|
||||||
|
'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON',
|
||||||
|
'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,40 +5,104 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
determine_ext,
|
clean_html,
|
||||||
int_or_none,
|
extract_attributes,
|
||||||
try_get,
|
get_element_by_class,
|
||||||
|
get_element_html_by_id,
|
||||||
|
parse_count,
|
||||||
|
remove_end,
|
||||||
|
update_url,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MurrtubeIE(InfoExtractor):
|
class MurrtubeIE(InfoExtractor):
|
||||||
_WORKING = False
|
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
murrtube:|
|
murrtube:|
|
||||||
https?://murrtube\.net/videos/(?P<slug>[a-z0-9\-]+)\-
|
https?://murrtube\.net/(?:v/|videos/(?P<slug>[a-z0-9-]+?)-)
|
||||||
)
|
)
|
||||||
(?P<id>[a-f0-9]{8}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{4}\-[a-f0-9]{12})
|
(?P<id>[A-Z0-9]{4}|[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})
|
||||||
'''
|
'''
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
'url': 'https://murrtube.net/videos/inferno-x-skyler-148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
||||||
'md5': '169f494812d9a90914b42978e73aa690',
|
'md5': '70380878a77e8565d4aea7f68b8bbb35',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '148b6f2a-fdcc-4902-affe-9c0f41aaaca0',
|
'id': 'ca885d8456b95de529b6723b158032e11115d',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Inferno X Skyler',
|
'title': 'Inferno X Skyler',
|
||||||
'description': 'Humping a very good slutty sheppy (roomate)',
|
'description': 'Humping a very good slutty sheppy (roomate)',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 284,
|
|
||||||
'uploader': 'Inferno Wolf',
|
'uploader': 'Inferno Wolf',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'thumbnail': 'https://storage.murrtube.net/murrtube-production/ekbs3zcfvuynnqfx72nn2tkokvsd',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'],
|
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://murrtube.net/v/0J2Q',
|
||||||
|
'md5': '31262f6ac56f0ca75e5a54a0f3fefcb6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8442998c52134968d9caa36e473e1a6bac6ca',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Hayel',
|
||||||
|
'title': 'Who\'s in charge now?',
|
||||||
|
'description': 'md5:795791e97e5b0f1805ea84573f02a997',
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': 'https://storage.murrtube.net/murrtube-production/fb1ojjwiucufp34ya6hxu5vfqi5s',
|
||||||
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_count(self, name, html):
|
||||||
|
return parse_count(self._search_regex(
|
||||||
|
rf'([\d,]+)\s+<span[^>]*>{name}</span>', html, name, default=None))
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
homepage = self._download_webpage(
|
||||||
|
'https://murrtube.net', None, note='Getting session token')
|
||||||
|
self._request_webpage(
|
||||||
|
'https://murrtube.net/accept_age_check', None, 'Setting age cookie',
|
||||||
|
data=urlencode_postdata(self._hidden_inputs(homepage)))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
if video_id.startswith('murrtube:'):
|
||||||
|
raise ExtractorError('Support for murrtube: prefix URLs is broken')
|
||||||
|
video_page = self._download_webpage(url, video_id)
|
||||||
|
video_attrs = extract_attributes(get_element_html_by_id('video', video_page))
|
||||||
|
playlist = update_url(video_attrs['data-url'], query=None)
|
||||||
|
video_id = self._search_regex(r'/([\da-f]+)/index.m3u8', playlist, 'video id')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': remove_end(self._og_search_title(video_page), ' - Murrtube'),
|
||||||
|
'age_limit': 18,
|
||||||
|
'formats': self._extract_m3u8_formats(playlist, video_id, 'mp4'),
|
||||||
|
'description': self._og_search_description(video_page),
|
||||||
|
'thumbnail': update_url(self._og_search_thumbnail(video_page, default=''), query=None) or None,
|
||||||
|
'uploader': clean_html(get_element_by_class('pl-1 is-size-6 has-text-lighter', video_page)),
|
||||||
|
'view_count': self._extract_count('Views', video_page),
|
||||||
|
'like_count': self._extract_count('Likes', video_page),
|
||||||
|
'comment_count': self._extract_count('Comments', video_page),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MurrtubeUserIE(InfoExtractor):
|
||||||
|
_WORKING = False
|
||||||
|
IE_DESC = 'Murrtube user profile'
|
||||||
|
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://murrtube.net/stormy',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'stormy',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 27,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 10
|
||||||
|
|
||||||
def _download_gql(self, video_id, op, note=None, fatal=True):
|
def _download_gql(self, video_id, op, note=None, fatal=True):
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
'https://murrtube.net/graphql',
|
'https://murrtube.net/graphql',
|
||||||
|
@ -46,73 +110,6 @@ def _download_gql(self, video_id, op, note=None, fatal=True):
|
||||||
headers={'Content-Type': 'application/json'})
|
headers={'Content-Type': 'application/json'})
|
||||||
return result['data']
|
return result['data']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
data = self._download_gql(video_id, {
|
|
||||||
'operationName': 'Medium',
|
|
||||||
'variables': {
|
|
||||||
'id': video_id,
|
|
||||||
},
|
|
||||||
'query': '''\
|
|
||||||
query Medium($id: ID!) {
|
|
||||||
medium(id: $id) {
|
|
||||||
title
|
|
||||||
description
|
|
||||||
key
|
|
||||||
duration
|
|
||||||
commentsCount
|
|
||||||
likesCount
|
|
||||||
viewsCount
|
|
||||||
thumbnailKey
|
|
||||||
tagList
|
|
||||||
user {
|
|
||||||
name
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
__typename
|
|
||||||
}
|
|
||||||
}'''})
|
|
||||||
meta = data['medium']
|
|
||||||
|
|
||||||
storage_url = 'https://storage.murrtube.net/murrtube/'
|
|
||||||
format_url = storage_url + meta.get('key', '')
|
|
||||||
thumbnail = storage_url + meta.get('thumbnailKey', '')
|
|
||||||
|
|
||||||
if determine_ext(format_url) == 'm3u8':
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native', fatal=False)
|
|
||||||
else:
|
|
||||||
formats = [{'url': format_url}]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': meta.get('title'),
|
|
||||||
'description': meta.get('description'),
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': int_or_none(meta.get('duration')),
|
|
||||||
'uploader': try_get(meta, lambda x: x['user']['name']),
|
|
||||||
'view_count': meta.get('viewsCount'),
|
|
||||||
'like_count': meta.get('likesCount'),
|
|
||||||
'comment_count': meta.get('commentsCount'),
|
|
||||||
'tags': meta.get('tagList'),
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class MurrtubeUserIE(MurrtubeIE): # XXX: Do not subclass from concrete IE
|
|
||||||
_WORKING = False
|
|
||||||
IE_DESC = 'Murrtube user profile'
|
|
||||||
_VALID_URL = r'https?://murrtube\.net/(?P<id>[^/]+)$'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://murrtube.net/stormy',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'stormy',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 27,
|
|
||||||
}
|
|
||||||
_PAGE_SIZE = 10
|
|
||||||
|
|
||||||
def _fetch_page(self, username, user_id, page):
|
def _fetch_page(self, username, user_id, page):
|
||||||
data = self._download_gql(username, {
|
data = self._download_gql(username, {
|
||||||
'operationName': 'Media',
|
'operationName': 'Media',
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
@ -498,10 +499,8 @@ def _real_extract(self, url):
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False))
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(va.get('bitrate'), 1000)
|
tbr = int_or_none(va.get('bitrate'), 1000)
|
||||||
if tbr:
|
|
||||||
format_id += f'-{tbr}'
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': join_nonempty(format_id, tbr),
|
||||||
'url': public_url,
|
'url': public_url,
|
||||||
'width': int_or_none(va.get('width')),
|
'width': int_or_none(va.get('width')),
|
||||||
'height': int_or_none(va.get('height')),
|
'height': int_or_none(va.get('height')),
|
||||||
|
|
|
@ -22,12 +22,22 @@
|
||||||
|
|
||||||
|
|
||||||
class NetEaseMusicBaseIE(InfoExtractor):
|
class NetEaseMusicBaseIE(InfoExtractor):
|
||||||
_FORMATS = ['bMusic', 'mMusic', 'hMusic']
|
# XXX: _extract_formats logic depends on the order of the levels in each tier
|
||||||
|
_LEVELS = (
|
||||||
|
'standard', # free tier; 标准; 128kbps mp3 or aac
|
||||||
|
'higher', # free tier; 192kbps mp3 or aac
|
||||||
|
'exhigh', # free tier; 极高 (HQ); 320kbps mp3 or aac
|
||||||
|
'lossless', # VIP tier; 无损 (SQ); 48kHz/16bit flac
|
||||||
|
'hires', # VIP tier; 高解析度无损 (Hi-Res); 192kHz/24bit flac
|
||||||
|
'jyeffect', # VIP tier; 高清臻音 (Spatial Audio); 96kHz/24bit flac
|
||||||
|
'jymaster', # SVIP tier; 超清母带 (Master); 192kHz/24bit flac
|
||||||
|
'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac
|
||||||
|
)
|
||||||
_API_BASE = 'http://music.163.com/api/'
|
_API_BASE = 'http://music.163.com/api/'
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def kilo_or_none(value):
|
def _kilo_or_none(value):
|
||||||
return int_or_none(value, scale=1000)
|
return int_or_none(value, scale=1000)
|
||||||
|
|
||||||
def _create_eapi_cipher(self, api_path, query_body, cookies):
|
def _create_eapi_cipher(self, api_path, query_body, cookies):
|
||||||
|
@ -66,45 +76,43 @@ def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
|
||||||
**headers,
|
**headers,
|
||||||
}, **kwargs)
|
}, **kwargs)
|
||||||
|
|
||||||
def _call_player_api(self, song_id, bitrate):
|
def _call_player_api(self, song_id, level):
|
||||||
return self._download_eapi_json(
|
return self._download_eapi_json(
|
||||||
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate},
|
'/song/enhance/player/url/v1', song_id,
|
||||||
note=f'Downloading song URL info: bitrate {bitrate}')
|
{'ids': f'[{song_id}]', 'level': level, 'encodeType': 'flac'},
|
||||||
|
note=f'Downloading song URL info: level {level}')
|
||||||
|
|
||||||
def extract_formats(self, info):
|
def _extract_formats(self, info):
|
||||||
err = 0
|
|
||||||
formats = []
|
formats = []
|
||||||
song_id = info['id']
|
song_id = info['id']
|
||||||
for song_format in self._FORMATS:
|
for level in self._LEVELS:
|
||||||
details = info.get(song_format)
|
song = traverse_obj(
|
||||||
if not details:
|
self._call_player_api(song_id, level), ('data', lambda _, v: url_or_none(v['url']), any))
|
||||||
|
if not song:
|
||||||
|
break # Media is not available due to removal or geo-restriction
|
||||||
|
actual_level = song.get('level')
|
||||||
|
if actual_level and actual_level != level:
|
||||||
|
if level in ('lossless', 'jymaster'):
|
||||||
|
break # We've already extracted the highest level of the user's account tier
|
||||||
continue
|
continue
|
||||||
bitrate = int_or_none(details.get('bitrate')) or 999000
|
|
||||||
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
|
|
||||||
song_url = song['url']
|
|
||||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': song_url,
|
'url': song['url'],
|
||||||
'format_id': song_format,
|
'format_id': level,
|
||||||
'asr': traverse_obj(details, ('sr', {int_or_none})),
|
'vcodec': 'none',
|
||||||
**traverse_obj(song, {
|
**traverse_obj(song, {
|
||||||
'ext': ('type', {str}),
|
'ext': ('type', {str}),
|
||||||
'abr': ('br', {self.kilo_or_none}),
|
'abr': ('br', {self._kilo_or_none}),
|
||||||
'filesize': ('size', {int_or_none}),
|
'filesize': ('size', {int_or_none}),
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
elif err == 0:
|
if not actual_level:
|
||||||
err = traverse_obj(song, ('code', {int})) or 0
|
break # Only 1 level is available if API does not return a value (netease:program)
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
if err != 0 and (err < 200 or err >= 400):
|
|
||||||
raise ExtractorError(f'No media links found (site code {err})', expected=True)
|
|
||||||
else:
|
|
||||||
self.raise_geo_restricted(
|
self.raise_geo_restricted(
|
||||||
'No media links found: probably due to geo restriction.', countries=['CN'])
|
'No media links found; possibly due to geo restriction', countries=['CN'])
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def query_api(self, endpoint, video_id, note):
|
def _query_api(self, endpoint, video_id, note):
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
|
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
|
||||||
code = traverse_obj(result, ('code', {int}))
|
code = traverse_obj(result, ('code', {int}))
|
||||||
|
@ -128,32 +136,29 @@ def _get_entries(self, songs_data, entry_keys=None, id_key='id', name_key='name'
|
||||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
IE_NAME = 'netease:song'
|
IE_NAME = 'netease:song'
|
||||||
IE_DESC = '网易云音乐'
|
IE_DESC = '网易云音乐'
|
||||||
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://music.163.com/#/song?id=548648087',
|
'url': 'https://music.163.com/#/song?id=550136151',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '548648087',
|
'id': '550136151',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '戒烟 (Live)',
|
'title': 'It\'s Ok (Live)',
|
||||||
'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊',
|
'creators': 'count:10',
|
||||||
'timestamp': 1522944000,
|
'timestamp': 1522944000,
|
||||||
'upload_date': '20180405',
|
'upload_date': '20180405',
|
||||||
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c',
|
'description': 'md5:9fd07059c2ccee3950dc8363429a3135',
|
||||||
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
|
'duration': 197,
|
||||||
'duration': 256,
|
|
||||||
'thumbnail': r're:^http.*\.jpg',
|
'thumbnail': r're:^http.*\.jpg',
|
||||||
'album': '偶像练习生 表演曲目合集',
|
'album': '偶像练习生 表演曲目合集',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'album_artist': '偶像练习生',
|
'album_artists': ['偶像练习生'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'No lyrics.',
|
|
||||||
'url': 'http://music.163.com/song?id=17241424',
|
'url': 'http://music.163.com/song?id=17241424',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '17241424',
|
'id': '17241424',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Opus 28',
|
'title': 'Opus 28',
|
||||||
'creator': 'Dustin O\'Halloran',
|
|
||||||
'upload_date': '20080211',
|
'upload_date': '20080211',
|
||||||
'timestamp': 1202745600,
|
'timestamp': 1202745600,
|
||||||
'duration': 263,
|
'duration': 263,
|
||||||
|
@ -161,15 +166,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
'album': 'Piano Solos Vol. 2',
|
'album': 'Piano Solos Vol. 2',
|
||||||
'album_artist': 'Dustin O\'Halloran',
|
'album_artist': 'Dustin O\'Halloran',
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
|
'description': '[00:05.00]纯音乐,请欣赏\n',
|
||||||
|
'album_artists': ['Dustin O\'Halloran'],
|
||||||
|
'creators': ['Dustin O\'Halloran'],
|
||||||
|
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||||
'md5': '95826c73ea50b1c288b22180ec9e754d',
|
'md5': 'b896be78d8d34bd7bb665b26710913ff',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '95670',
|
'id': '95670',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '国际歌',
|
'title': '国际歌',
|
||||||
'creator': '马备',
|
|
||||||
'upload_date': '19911130',
|
'upload_date': '19911130',
|
||||||
'timestamp': 691516800,
|
'timestamp': 691516800,
|
||||||
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
||||||
|
@ -180,6 +188,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
'average_rating': int,
|
'average_rating': int,
|
||||||
'album': '红色摇滚',
|
'album': '红色摇滚',
|
||||||
'album_artist': '侯牧人',
|
'album_artist': '侯牧人',
|
||||||
|
'creators': ['马备'],
|
||||||
|
'album_artists': ['侯牧人'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://music.163.com/#/song?id=32102397',
|
'url': 'http://music.163.com/#/song?id=32102397',
|
||||||
|
@ -188,7 +198,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
'id': '32102397',
|
'id': '32102397',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Bad Blood',
|
'title': 'Bad Blood',
|
||||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
'creators': ['Taylor Swift', 'Kendrick Lamar'],
|
||||||
'upload_date': '20150516',
|
'upload_date': '20150516',
|
||||||
'timestamp': 1431792000,
|
'timestamp': 1431792000,
|
||||||
'description': 'md5:21535156efb73d6d1c355f95616e285a',
|
'description': 'md5:21535156efb73d6d1c355f95616e285a',
|
||||||
|
@ -207,7 +217,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
'id': '22735043',
|
'id': '22735043',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '소원을 말해봐 (Genie)',
|
'title': '소원을 말해봐 (Genie)',
|
||||||
'creator': '少女时代',
|
'creators': ['少女时代'],
|
||||||
'upload_date': '20100127',
|
'upload_date': '20100127',
|
||||||
'timestamp': 1264608000,
|
'timestamp': 1264608000,
|
||||||
'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
|
'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
|
||||||
|
@ -251,12 +261,12 @@ def _process_lyrics(self, lyrics_info):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
song_id = self._match_id(url)
|
song_id = self._match_id(url)
|
||||||
|
|
||||||
info = self.query_api(
|
info = self._query_api(
|
||||||
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
|
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
|
||||||
|
|
||||||
formats = self.extract_formats(info)
|
formats = self._extract_formats(info)
|
||||||
|
|
||||||
lyrics = self._process_lyrics(self.query_api(
|
lyrics = self._process_lyrics(self._query_api(
|
||||||
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
|
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
|
||||||
lyric_data = {
|
lyric_data = {
|
||||||
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
|
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
|
||||||
|
@ -267,14 +277,14 @@ def _real_extract(self, url):
|
||||||
'id': song_id,
|
'id': song_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
|
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
|
||||||
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None,
|
'creators': traverse_obj(info, ('artists', ..., 'name')) or None,
|
||||||
'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None,
|
'album_artists': traverse_obj(info, ('album', 'artists', ..., 'name')) or None,
|
||||||
**lyric_data,
|
**lyric_data,
|
||||||
**traverse_obj(info, {
|
**traverse_obj(info, {
|
||||||
'title': ('name', {str}),
|
'title': ('name', {str}),
|
||||||
'timestamp': ('album', 'publishTime', {self.kilo_or_none}),
|
'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
|
||||||
'thumbnail': ('album', 'picUrl', {url_or_none}),
|
'thumbnail': ('album', 'picUrl', {url_or_none}),
|
||||||
'duration': ('duration', {self.kilo_or_none}),
|
'duration': ('duration', {self._kilo_or_none}),
|
||||||
'album': ('album', 'name', {str}),
|
'album': ('album', 'name', {str}),
|
||||||
'average_rating': ('score', {int_or_none}),
|
'average_rating': ('score', {int_or_none}),
|
||||||
}),
|
}),
|
||||||
|
@ -284,7 +294,7 @@ def _real_extract(self, url):
|
||||||
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||||
IE_NAME = 'netease:album'
|
IE_NAME = 'netease:album'
|
||||||
IE_DESC = '网易云音乐 - 专辑'
|
IE_DESC = '网易云音乐 - 专辑'
|
||||||
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://music\.163\.com/(?:#/)?album\?id=(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://music.163.com/#/album?id=133153666',
|
'url': 'https://music.163.com/#/album?id=133153666',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -294,7 +304,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
|
||||||
'description': '桃几2021年翻唱合集',
|
'description': '桃几2021年翻唱合集',
|
||||||
'thumbnail': r're:^http.*\.jpg',
|
'thumbnail': r're:^http.*\.jpg',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 13,
|
'playlist_mincount': 12,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://music.163.com/#/album?id=220780',
|
'url': 'http://music.163.com/#/album?id=220780',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -328,7 +338,7 @@ def _real_extract(self, url):
|
||||||
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||||
IE_NAME = 'netease:singer'
|
IE_NAME = 'netease:singer'
|
||||||
IE_DESC = '网易云音乐 - 歌手'
|
IE_DESC = '网易云音乐 - 歌手'
|
||||||
_VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://music\.163\.com/(?:#/)?artist\?id=(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'note': 'Singer has aliases.',
|
'note': 'Singer has aliases.',
|
||||||
'url': 'http://music.163.com/#/artist?id=10559',
|
'url': 'http://music.163.com/#/artist?id=10559',
|
||||||
|
@ -358,7 +368,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
singer_id = self._match_id(url)
|
singer_id = self._match_id(url)
|
||||||
|
|
||||||
info = self.query_api(
|
info = self._query_api(
|
||||||
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
|
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
|
||||||
|
|
||||||
name = join_nonempty(
|
name = join_nonempty(
|
||||||
|
@ -372,7 +382,7 @@ def _real_extract(self, url):
|
||||||
class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||||
IE_NAME = 'netease:playlist'
|
IE_NAME = 'netease:playlist'
|
||||||
IE_DESC = '网易云音乐 - 歌单'
|
IE_DESC = '网易云音乐 - 歌单'
|
||||||
_VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://music.163.com/#/playlist?id=79177352',
|
'url': 'http://music.163.com/#/playlist?id=79177352',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -405,11 +415,15 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
|
||||||
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
|
'url': 'http://music.163.com/#/discover/toplist?id=3733003',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3733003',
|
'id': '3733003',
|
||||||
'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
|
'title': 're:韩国Melon排行榜周榜(?: [0-9]{4}-[0-9]{2}-[0-9]{2})?',
|
||||||
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
|
'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
|
||||||
|
'upload_date': '20200109',
|
||||||
|
'uploader_id': '2937386',
|
||||||
|
'tags': ['韩语', '榜单'],
|
||||||
|
'uploader': 'Melon榜单',
|
||||||
|
'timestamp': 1578569373,
|
||||||
},
|
},
|
||||||
'playlist_count': 50,
|
'playlist_count': 50,
|
||||||
'skip': 'Blocked outside Mainland China',
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -426,7 +440,7 @@ def _real_extract(self, url):
|
||||||
'tags': ('tags', ..., {str}),
|
'tags': ('tags', ..., {str}),
|
||||||
'uploader': ('creator', 'nickname', {str}),
|
'uploader': ('creator', 'nickname', {str}),
|
||||||
'uploader_id': ('creator', 'userId', {str_or_none}),
|
'uploader_id': ('creator', 'userId', {str_or_none}),
|
||||||
'timestamp': ('updateTime', {self.kilo_or_none}),
|
'timestamp': ('updateTime', {self._kilo_or_none}),
|
||||||
}))
|
}))
|
||||||
if traverse_obj(info, ('playlist', 'specialType')) == 10:
|
if traverse_obj(info, ('playlist', 'specialType')) == 10:
|
||||||
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
|
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
|
||||||
|
@ -437,7 +451,7 @@ def _real_extract(self, url):
|
||||||
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||||
IE_NAME = 'netease:mv'
|
IE_NAME = 'netease:mv'
|
||||||
IE_DESC = '网易云音乐 - MV'
|
IE_DESC = '网易云音乐 - MV'
|
||||||
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://music\.163\.com/(?:#/)?mv\?id=(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://music.163.com/#/mv?id=10958064',
|
'url': 'https://music.163.com/#/mv?id=10958064',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -445,7 +459,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '交换余生',
|
'title': '交换余生',
|
||||||
'description': 'md5:e845872cff28820642a2b02eda428fea',
|
'description': 'md5:e845872cff28820642a2b02eda428fea',
|
||||||
'creator': '林俊杰',
|
'creators': ['林俊杰'],
|
||||||
'upload_date': '20200916',
|
'upload_date': '20200916',
|
||||||
'thumbnail': r're:http.*\.jpg',
|
'thumbnail': r're:http.*\.jpg',
|
||||||
'duration': 364,
|
'duration': 364,
|
||||||
|
@ -460,7 +474,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '이럴거면 그러지말지',
|
'title': '이럴거면 그러지말지',
|
||||||
'description': '白雅言自作曲唱甜蜜爱情',
|
'description': '白雅言自作曲唱甜蜜爱情',
|
||||||
'creator': '白娥娟',
|
'creators': ['白娥娟'],
|
||||||
'upload_date': '20150520',
|
'upload_date': '20150520',
|
||||||
'thumbnail': r're:http.*\.jpg',
|
'thumbnail': r're:http.*\.jpg',
|
||||||
'duration': 216,
|
'duration': 216,
|
||||||
|
@ -468,12 +482,28 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
|
'skip': 'Blocked outside Mainland China',
|
||||||
|
}, {
|
||||||
|
'note': 'This MV has multiple creators.',
|
||||||
|
'url': 'https://music.163.com/#/mv?id=22593543',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '22593543',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '老北京杀器',
|
||||||
|
'creators': ['秃子2z', '辉子', 'Saber梁维嘉'],
|
||||||
|
'duration': 206,
|
||||||
|
'upload_date': '20240618',
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:http.*\.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mv_id = self._match_id(url)
|
mv_id = self._match_id(url)
|
||||||
|
|
||||||
info = self.query_api(
|
info = self._query_api(
|
||||||
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
|
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
|
||||||
|
|
||||||
formats = [
|
formats = [
|
||||||
|
@ -484,13 +514,13 @@ def _real_extract(self, url):
|
||||||
return {
|
return {
|
||||||
'id': mv_id,
|
'id': mv_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
|
||||||
**traverse_obj(info, {
|
**traverse_obj(info, {
|
||||||
'title': ('name', {str}),
|
'title': ('name', {str}),
|
||||||
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
|
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
|
||||||
'creator': ('artistName', {str}),
|
|
||||||
'upload_date': ('publishTime', {unified_strdate}),
|
'upload_date': ('publishTime', {unified_strdate}),
|
||||||
'thumbnail': ('cover', {url_or_none}),
|
'thumbnail': ('cover', {url_or_none}),
|
||||||
'duration': ('duration', {self.kilo_or_none}),
|
'duration': ('duration', {self._kilo_or_none}),
|
||||||
'view_count': ('playCount', {int_or_none}),
|
'view_count': ('playCount', {int_or_none}),
|
||||||
'like_count': ('likeCount', {int_or_none}),
|
'like_count': ('likeCount', {int_or_none}),
|
||||||
'comment_count': ('commentCount', {int_or_none}),
|
'comment_count': ('commentCount', {int_or_none}),
|
||||||
|
@ -501,7 +531,7 @@ def _real_extract(self, url):
|
||||||
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||||
IE_NAME = 'netease:program'
|
IE_NAME = 'netease:program'
|
||||||
IE_DESC = '网易云音乐 - 电台节目'
|
IE_DESC = '网易云音乐 - 电台节目'
|
||||||
_VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://music.163.com/#/program?id=10109055',
|
'url': 'http://music.163.com/#/program?id=10109055',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -509,7 +539,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '不丹足球背后的故事',
|
'title': '不丹足球背后的故事',
|
||||||
'description': '喜马拉雅人的足球梦 ...',
|
'description': '喜马拉雅人的足球梦 ...',
|
||||||
'creator': '大话西藏',
|
'creators': ['大话西藏'],
|
||||||
'timestamp': 1434179287,
|
'timestamp': 1434179287,
|
||||||
'upload_date': '20150613',
|
'upload_date': '20150613',
|
||||||
'thumbnail': r're:http.*\.jpg',
|
'thumbnail': r're:http.*\.jpg',
|
||||||
|
@ -522,7 +552,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||||
'id': '10141022',
|
'id': '10141022',
|
||||||
'title': '滚滚电台的有声节目',
|
'title': '滚滚电台的有声节目',
|
||||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||||
'creator': '滚滚电台ORZ',
|
'creators': ['滚滚电台ORZ'],
|
||||||
'timestamp': 1434450733,
|
'timestamp': 1434450733,
|
||||||
'upload_date': '20150616',
|
'upload_date': '20150616',
|
||||||
'thumbnail': r're:http.*\.jpg',
|
'thumbnail': r're:http.*\.jpg',
|
||||||
|
@ -536,7 +566,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '滚滚电台的有声节目',
|
'title': '滚滚电台的有声节目',
|
||||||
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
|
||||||
'creator': '滚滚电台ORZ',
|
'creators': ['滚滚电台ORZ'],
|
||||||
'timestamp': 1434450733,
|
'timestamp': 1434450733,
|
||||||
'upload_date': '20150616',
|
'upload_date': '20150616',
|
||||||
'thumbnail': r're:http.*\.jpg',
|
'thumbnail': r're:http.*\.jpg',
|
||||||
|
@ -550,7 +580,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
program_id = self._match_id(url)
|
program_id = self._match_id(url)
|
||||||
|
|
||||||
info = self.query_api(
|
info = self._query_api(
|
||||||
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
|
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
|
||||||
|
|
||||||
metainfo = traverse_obj(info, {
|
metainfo = traverse_obj(info, {
|
||||||
|
@ -558,17 +588,17 @@ def _real_extract(self, url):
|
||||||
'description': ('description', {str}),
|
'description': ('description', {str}),
|
||||||
'creator': ('dj', 'brand', {str}),
|
'creator': ('dj', 'brand', {str}),
|
||||||
'thumbnail': ('coverUrl', {url_or_none}),
|
'thumbnail': ('coverUrl', {url_or_none}),
|
||||||
'timestamp': ('createTime', {self.kilo_or_none}),
|
'timestamp': ('createTime', {self._kilo_or_none}),
|
||||||
})
|
})
|
||||||
|
|
||||||
if not self._yes_playlist(
|
if not self._yes_playlist(
|
||||||
info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
|
info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
|
||||||
formats = self.extract_formats(info['mainSong'])
|
formats = self._extract_formats(info['mainSong'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': str(info['mainSong']['id']),
|
'id': str(info['mainSong']['id']),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})),
|
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
|
||||||
**metainfo,
|
**metainfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -579,7 +609,7 @@ def _real_extract(self, url):
|
||||||
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
|
||||||
IE_NAME = 'netease:djradio'
|
IE_NAME = 'netease:djradio'
|
||||||
IE_DESC = '网易云音乐 - 电台'
|
IE_DESC = '网易云音乐 - 电台'
|
||||||
_VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://music\.163\.com/(?:#/)?djradio\?id=(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://music.163.com/#/djradio?id=42',
|
'url': 'http://music.163.com/#/djradio?id=42',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -597,7 +627,7 @@ def _real_extract(self, url):
|
||||||
metainfo = {}
|
metainfo = {}
|
||||||
entries = []
|
entries = []
|
||||||
for offset in itertools.count(start=0, step=self._PAGE_SIZE):
|
for offset in itertools.count(start=0, step=self._PAGE_SIZE):
|
||||||
info = self.query_api(
|
info = self._query_api(
|
||||||
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
|
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
|
||||||
dj_id, note=f'Downloading dj programs - {offset}')
|
dj_id, note=f'Downloading dj programs - {offset}')
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
@ -41,7 +42,7 @@ def _real_extract(self, url):
|
||||||
else:
|
else:
|
||||||
height = int_or_none(playback.get('height'))
|
height = int_or_none(playback.get('height'))
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': playback.get('name', 'http' + (f'-{height}p' if height else '')),
|
'format_id': playback.get('name') or join_nonempty('http', height and f'{height}p'),
|
||||||
'url': playback_url,
|
'url': playback_url,
|
||||||
'width': int_or_none(playback.get('width')),
|
'width': int_or_none(playback.get('width')),
|
||||||
'height': height,
|
'height': height,
|
||||||
|
|
|
@ -43,15 +43,17 @@ def _parse_video_data(self, container, extract_formats=True):
|
||||||
is_live = media.get('media_status') == 'RUNNING'
|
is_live = media.get('media_status') == 'RUNNING'
|
||||||
|
|
||||||
formats, subtitles = None, None
|
formats, subtitles = None, None
|
||||||
|
headers = {'Referer': 'https://nuum.ru/'}
|
||||||
if extract_formats:
|
if extract_formats:
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
media_url, video_id, 'mp4', live=is_live)
|
media_url, video_id, 'mp4', live=is_live, headers=headers)
|
||||||
|
|
||||||
return filter_dict({
|
return filter_dict({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'http_headers': headers,
|
||||||
**traverse_obj(container, {
|
**traverse_obj(container, {
|
||||||
'title': ('media_container_name', {str}),
|
'title': ('media_container_name', {str}),
|
||||||
'description': ('media_container_description', {str}),
|
'description': ('media_container_description', {str}),
|
||||||
|
@ -78,7 +80,7 @@ class NuumMediaIE(NuumBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
|
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
|
||||||
'md5': 'f1d9118a30403e32b702a204eb03aca3',
|
'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1567547',
|
'id': '1567547',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
|
|
@ -1,9 +1,19 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none, try_get
|
from ..networking.exceptions import HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
parse_qs,
|
||||||
|
try_get,
|
||||||
|
update_url,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class OlympicsReplayIE(InfoExtractor):
|
class OlympicsReplayIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P<id>[^/#&?]+)'
|
_VALID_URL = r'https?://(?:www\.)?olympics\.com/[a-z]{2}/(?:paris-2024/)?(?:replay|videos?|original-series/episode)/(?P<id>[\w-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays',
|
'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -11,26 +21,105 @@ class OlympicsReplayIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020',
|
'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020',
|
||||||
'upload_date': '20210801',
|
'upload_date': '20210801',
|
||||||
'timestamp': 1627783200,
|
'timestamp': 1627797600,
|
||||||
'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3',
|
'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3',
|
||||||
'uploader': 'International Olympic Committee',
|
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/nua4o7zwyaznoaejpbk2',
|
||||||
},
|
'duration': 7017.0,
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp',
|
'url': 'https://olympics.com/en/original-series/episode/b-boys-and-b-girls-take-the-spotlight-breaking-life-road-to-paris-2024',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': '32633650-c5ee-4280-8b94-fb6defb6a9b5',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'B-girl Nicka - Breaking Life, Road to Paris 2024 | Episode 1',
|
||||||
|
'upload_date': '20240517',
|
||||||
|
'timestamp': 1715948200,
|
||||||
|
'description': 'md5:f63d728a41270ec628f6ac33ce471bb1',
|
||||||
|
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/a3j96l7j6so3vyfijby1',
|
||||||
|
'duration': 1321.0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://olympics.com/en/paris-2024/videos/men-s-preliminaries-gbr-esp-ned-rsa-hockey-olympic-games-paris-2024',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3d96db23-8eee-4b7c-8ef5-488a0361026c',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Men\'s Preliminaries GBR-ESP & NED-RSA | Hockey | Olympic Games Paris 2024',
|
||||||
|
'upload_date': '20240727',
|
||||||
|
'timestamp': 1722066600,
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to RU, BR, BT, NP, TM, BD, TL',
|
||||||
|
}, {
|
||||||
|
'url': 'https://olympics.com/en/paris-2024/videos/dnp-suni-lee-i-have-goals-and-i-have-expectations-for-myself-but-i-also-am-trying-to-give-myself-grace',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a42f37ab-8a74-41d0-a7d9-af27b7b02a90',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:c7cfbc9918636a98e66400a812e4d407',
|
||||||
|
'upload_date': '20240729',
|
||||||
|
'timestamp': 1722288600,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
|
def _extract_from_nextjs_data(self, webpage, video_id):
|
||||||
|
data = traverse_obj(self._search_nextjs_data(webpage, video_id, default={}), (
|
||||||
|
'props', 'pageProps', 'page', 'items',
|
||||||
|
lambda _, v: v['name'] == 'videoPlaylist', 'data', 'currentVideo', {dict}, any))
|
||||||
|
if not data:
|
||||||
|
return None
|
||||||
|
|
||||||
|
geo_countries = traverse_obj(data, ('countries', ..., {str}))
|
||||||
|
if traverse_obj(data, ('geoRestrictedVideo', {bool})):
|
||||||
|
self.raise_geo_restricted(countries=geo_countries)
|
||||||
|
|
||||||
|
is_live = traverse_obj(data, ('streamingStatus', {str})) == 'LIVE'
|
||||||
|
m3u8_url = traverse_obj(data, ('videoUrl', {url_or_none})) or data['streamUrl']
|
||||||
|
tokenized_url = self._tokenize_url(m3u8_url, data['jwtToken'], is_live, video_id)
|
||||||
|
|
||||||
|
try:
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
tokenized_url, video_id, 'mp4', m3u8_id='hls')
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, HTTPError) and 'georestricted' in e.cause.msg:
|
||||||
|
self.raise_geo_restricted(countries=geo_countries)
|
||||||
|
raise
|
||||||
|
|
||||||
|
return {
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'is_live': is_live,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'id': ('videoID', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'timestamp': ('contentDate', {parse_iso8601}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _tokenize_url(self, url, token, is_live, video_id):
|
||||||
|
return self._download_json(
|
||||||
|
'https://metering.olympics.com/tokengenerator', video_id,
|
||||||
|
'Downloading tokenized m3u8 url', query={
|
||||||
|
**parse_qs(url),
|
||||||
|
'url': update_url(url, query=None),
|
||||||
|
'service-id': 'live' if is_live else 'vod',
|
||||||
|
'user-auth': token,
|
||||||
|
})['data']['url']
|
||||||
|
|
||||||
|
def _legacy_tokenize_url(self, url, video_id):
|
||||||
|
return self._download_json(
|
||||||
|
'https://olympics.com/tokenGenerator', video_id,
|
||||||
|
'Downloading legacy tokenized m3u8 url', query={'url': url})
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
if info := self._extract_from_nextjs_data(webpage, video_id):
|
||||||
|
return info
|
||||||
|
|
||||||
title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
|
title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
|
||||||
uuid = self._html_search_meta('episode_uid', webpage)
|
video_uuid = self._html_search_meta('episode_uid', webpage)
|
||||||
m3u8_url = self._html_search_meta('video_url', webpage)
|
m3u8_url = self._html_search_meta('video_url', webpage)
|
||||||
json_ld = self._search_json_ld(webpage, uuid)
|
json_ld = self._search_json_ld(webpage, video_uuid)
|
||||||
thumbnails_list = json_ld.get('image')
|
thumbnails_list = json_ld.get('image')
|
||||||
if not thumbnails_list:
|
if not thumbnails_list:
|
||||||
thumbnails_list = self._html_search_regex(
|
thumbnails_list = self._html_search_regex(
|
||||||
|
@ -48,12 +137,12 @@ def _real_extract(self, url):
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)),
|
'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)),
|
||||||
})
|
})
|
||||||
m3u8_url = self._download_json(
|
|
||||||
f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url')
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls')
|
self._legacy_tokenize_url(m3u8_url, video_uuid), video_uuid, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': uuid,
|
'id': video_uuid,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|
|
@ -550,7 +550,8 @@ def _real_extract(self, url):
|
||||||
return self._extract_video_info(segment_id, selected_segment)
|
return self._extract_video_info(segment_id, selected_segment)
|
||||||
|
|
||||||
# Even some segmented videos have an unsegmented version available in API response root
|
# Even some segmented videos have an unsegmented version available in API response root
|
||||||
if not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none})):
|
if (self._configuration_arg('prefer_segments_playlist')
|
||||||
|
or not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none}))):
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
(self._extract_video_info(str(segment['id']), segment) for segment in segments),
|
(self._extract_video_info(str(segment['id']), segment) for segment in segments),
|
||||||
video_id, **self._parse_metadata(api_json), multi_video=True)
|
video_id, **self._parse_metadata(api_json), multi_video=True)
|
||||||
|
|
|
@ -316,7 +316,8 @@ def _real_extract(self, url):
|
||||||
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
||||||
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
||||||
if url_or_none(v_url) and self._request_webpage(
|
if url_or_none(v_url) and self._request_webpage(
|
||||||
v_url, video_id, 'Checking Vimeo embed URL', headers=headers, fatal=False, errnote=False):
|
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
|
||||||
|
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||||
VimeoIE, url_transparent=True))
|
VimeoIE, url_transparent=True))
|
||||||
|
|
|
@ -41,7 +41,7 @@ class PelotonIE(InfoExtractor):
|
||||||
}, 'params': {
|
}, 'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
'_skip': 'Account needed',
|
'skip': 'Account needed',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
|
'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -61,7 +61,7 @@ class PelotonIE(InfoExtractor):
|
||||||
}, 'params': {
|
}, 'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
'_skip': 'Account needed',
|
'skip': 'Account needed',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
|
_MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
|
||||||
|
@ -199,7 +199,7 @@ class PelotonLiveIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
'_skip': 'Account needed',
|
'skip': 'Account needed',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import int_or_none, join_nonempty
|
||||||
|
|
||||||
|
|
||||||
class PerformGroupIE(InfoExtractor):
|
class PerformGroupIE(InfoExtractor):
|
||||||
|
@ -50,11 +50,8 @@ def _real_extract(self, url):
|
||||||
if not c_url:
|
if not c_url:
|
||||||
continue
|
continue
|
||||||
tbr = int_or_none(c.get('bitrate'), 1000)
|
tbr = int_or_none(c.get('bitrate'), 1000)
|
||||||
format_id = 'http'
|
|
||||||
if tbr:
|
|
||||||
format_id += f'-{tbr}'
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': join_nonempty('http', tbr),
|
||||||
'url': c_url,
|
'url': c_url,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'width': int_or_none(c.get('width')),
|
'width': int_or_none(c.get('width')),
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
update_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,15 +44,16 @@ def _real_extract(self, url):
|
||||||
url
|
url
|
||||||
}
|
}
|
||||||
}''' % (channel_id, channel_id), # noqa: UP031
|
}''' % (channel_id, channel_id), # noqa: UP031
|
||||||
})['data']
|
}, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data']
|
||||||
metadata = data['channel']
|
metadata = data['channel']
|
||||||
|
|
||||||
if metadata.get('online') == 0:
|
if metadata.get('online') == 0:
|
||||||
raise ExtractorError('Stream is offline', expected=True)
|
raise ExtractorError('Stream is offline', expected=True)
|
||||||
title = metadata['title']
|
title = metadata['title']
|
||||||
|
|
||||||
cdn_data = self._download_json(
|
cdn_data = self._download_json(''.join((
|
||||||
data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
|
update_url(data['getLoadBalancerUrl']['url'], scheme='https'),
|
||||||
|
'/stream/json_', metadata['stream_name'], '.js')),
|
||||||
channel_id, 'Downloading load balancing info')
|
channel_id, 'Downloading load balancing info')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -99,10 +101,10 @@ class PicartoVodIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'skip': 'The VOD does not exist',
|
'skip': 'The VOD does not exist',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://picarto.tv/ArtofZod/videos/772650',
|
'url': 'https://picarto.tv/ArtofZod/videos/771008',
|
||||||
'md5': '00067a0889f1f6869cc512e3e79c521b',
|
'md5': 'abef5322f2700d967720c4c6754b2a34',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '772650',
|
'id': '771008',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Art of Zod - Drawing and Painting',
|
'title': 'Art of Zod - Drawing and Painting',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
@ -131,7 +133,7 @@ def _real_extract(self, url):
|
||||||
}}
|
}}
|
||||||
}}
|
}}
|
||||||
}}''',
|
}}''',
|
||||||
})['data']['video']
|
}, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data']['video']
|
||||||
|
|
||||||
file_name = data['file_name']
|
file_name = data['file_name']
|
||||||
netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc
|
netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class PokerGoBaseIE(InfoExtractor):
|
class PokerGoBaseIE(InfoExtractor):
|
||||||
|
@ -65,7 +66,7 @@ def _real_extract(self, url):
|
||||||
'width': image.get('width'),
|
'width': image.get('width'),
|
||||||
'height': image.get('height'),
|
'height': image.get('height'),
|
||||||
} for image in data_json.get('images') or [] if image.get('url')]
|
} for image in data_json.get('images') or [] if image.get('url')]
|
||||||
series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == video_id) or {}
|
series_json = traverse_obj(data_json, ('show_tags', lambda _, v: v['video_id'] == video_id, any)) or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
import functools
|
||||||
import json
|
import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import functools
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
@ -147,13 +148,13 @@ def fix_bitrate(bitrate):
|
||||||
'page_url': 'http://www.prosieben.de',
|
'page_url': 'http://www.prosieben.de',
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'format_id': 'rtmp{}'.format(f'-{tbr}' if tbr else ''),
|
'format_id': join_nonempty('rtmp', tbr),
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': source_url,
|
'url': source_url,
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
'format_id': 'http{}'.format(f'-{tbr}' if tbr else ''),
|
'format_id': join_nonempty('http', tbr),
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -1,48 +1,125 @@
|
||||||
|
import base64
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
OnDemandPagedList,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
|
js_to_json,
|
||||||
|
str_or_none,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
|
traverse_obj,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class QQMusicIE(InfoExtractor):
|
class QQMusicBaseIE(InfoExtractor):
|
||||||
|
def _get_cookie(self, key, default=None):
|
||||||
|
return getattr(self._get_cookies('https://y.qq.com').get(key), 'value', default)
|
||||||
|
|
||||||
|
def _get_g_tk(self):
|
||||||
|
n = 5381
|
||||||
|
for c in self._get_cookie('qqmusic_key', ''):
|
||||||
|
n += (n << 5) + ord(c)
|
||||||
|
return n & 2147483647
|
||||||
|
|
||||||
|
def _get_uin(self):
|
||||||
|
return int_or_none(self._get_cookie('uin')) or 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_logged_in(self):
|
||||||
|
return bool(self._get_uin() and self._get_cookie('fqm_pvqid'))
|
||||||
|
|
||||||
|
# Reference: m_r_GetRUin() in top_player.js
|
||||||
|
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
|
||||||
|
@staticmethod
|
||||||
|
def _m_r_get_ruin():
|
||||||
|
cur_ms = int(time.time() * 1000) % 1000
|
||||||
|
return int(round(random.random() * 2147483647) * cur_ms % 1E10)
|
||||||
|
|
||||||
|
def _download_init_data(self, url, mid, fatal=True):
|
||||||
|
webpage = self._download_webpage(url, mid, fatal=fatal)
|
||||||
|
return self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage,
|
||||||
|
'init data', mid, transform_source=js_to_json, fatal=fatal)
|
||||||
|
|
||||||
|
def _make_fcu_req(self, req_dict, mid, headers={}, **kwargs):
|
||||||
|
return self._download_json(
|
||||||
|
'https://u.y.qq.com/cgi-bin/musicu.fcg', mid, data=json.dumps({
|
||||||
|
'comm': {
|
||||||
|
'cv': 0,
|
||||||
|
'ct': 24,
|
||||||
|
'format': 'json',
|
||||||
|
'uin': self._get_uin(),
|
||||||
|
},
|
||||||
|
**req_dict,
|
||||||
|
}, separators=(',', ':')).encode(), headers=headers, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class QQMusicIE(QQMusicBaseIE):
|
||||||
IE_NAME = 'qqmusic'
|
IE_NAME = 'qqmusic'
|
||||||
IE_DESC = 'QQ音乐'
|
IE_DESC = 'QQ音乐'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html'
|
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/songDetail/(?P<id>[0-9A-Za-z]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html',
|
'url': 'https://y.qq.com/n/ryqq/songDetail/004Ti8rT003TaZ',
|
||||||
|
'md5': 'd7adc5c438d12e2cb648cca81593fd47',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '004Ti8rT003TaZ',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '永夜のパレード (永夜的游行)',
|
||||||
|
'album': '幻想遊園郷 -Fantastic Park-',
|
||||||
|
'release_date': '20111230',
|
||||||
|
'duration': 281,
|
||||||
|
'creators': ['ケーキ姫', 'JUMA'],
|
||||||
|
'genres': ['Pop'],
|
||||||
|
'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9',
|
||||||
|
'size': 4501244,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||||
|
'subtitles': 'count:1',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://y.qq.com/n/ryqq/songDetail/004295Et37taLD',
|
||||||
'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
|
'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '004295Et37taLD',
|
'id': '004295Et37taLD',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '可惜没如果',
|
'title': '可惜没如果',
|
||||||
'release_date': '20141227',
|
'album': '新地球 - 人 (Special Edition)',
|
||||||
'creator': '林俊杰',
|
'release_date': '20150129',
|
||||||
'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac',
|
'duration': 298,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'creators': ['林俊杰'],
|
||||||
|
'genres': ['Pop'],
|
||||||
|
'description': 'md5:f568421ff618d2066e74b65a04149c4e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||||
},
|
},
|
||||||
|
'skip': 'premium member only',
|
||||||
}, {
|
}, {
|
||||||
'note': 'There is no mp3-320 version of this song.',
|
'note': 'There is no mp3-320 version of this song.',
|
||||||
'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html',
|
'url': 'https://y.qq.com/n/ryqq/songDetail/004MsGEo3DdNxV',
|
||||||
'md5': 'fa3926f0c585cda0af8fa4f796482e3e',
|
'md5': '028aaef1ae13d8a9f4861a92614887f9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '004MsGEo3DdNxV',
|
'id': '004MsGEo3DdNxV',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '如果',
|
'title': '如果',
|
||||||
|
'album': '新传媒电视连续剧金曲系列II',
|
||||||
'release_date': '20050626',
|
'release_date': '20050626',
|
||||||
'creator': '李季美',
|
'duration': 220,
|
||||||
'description': 'md5:46857d5ed62bc4ba84607a805dccf437',
|
'creators': ['李季美'],
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'genres': [],
|
||||||
|
'description': 'md5:fc711212aa623b28534954dc4bd67385',
|
||||||
|
'size': 3535730,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'lyrics not in .lrc format',
|
'note': 'lyrics not in .lrc format',
|
||||||
'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html',
|
'url': 'https://y.qq.com/n/ryqq/songDetail/001JyApY11tIp6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '001JyApY11tIp6',
|
'id': '001JyApY11tIp6',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
|
@ -50,185 +127,193 @@ class QQMusicIE(InfoExtractor):
|
||||||
'release_date': '19970225',
|
'release_date': '19970225',
|
||||||
'creator': 'Dark Funeral',
|
'creator': 'Dark Funeral',
|
||||||
'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
|
'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
|
'skip': 'no longer available',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_FORMATS = {
|
_FORMATS = {
|
||||||
'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
|
'F000': {'name': 'flac', 'prefix': 'F000', 'ext': 'flac', 'preference': 60},
|
||||||
'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
|
'A000': {'name': 'ape', 'prefix': 'A000', 'ext': 'ape', 'preference': 50},
|
||||||
'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10},
|
'M800': {'name': '320mp3', 'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
|
||||||
|
'M500': {'name': '128mp3', 'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
|
||||||
|
'C400': {'name': '96aac', 'prefix': 'C400', 'ext': 'm4a', 'preference': 20, 'abr': 96},
|
||||||
|
'C200': {'name': '48aac', 'prefix': 'C200', 'ext': 'm4a', 'preference': 20, 'abr': 48},
|
||||||
}
|
}
|
||||||
|
|
||||||
# Reference: m_r_GetRUin() in top_player.js
|
|
||||||
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
|
|
||||||
@staticmethod
|
|
||||||
def m_r_get_ruin():
|
|
||||||
cur_ms = int(time.time() * 1000) % 1000
|
|
||||||
return int(round(random.random() * 2147483647) * cur_ms % 1E10)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mid = self._match_id(url)
|
mid = self._match_id(url)
|
||||||
|
|
||||||
detail_info_page = self._download_webpage(
|
init_data = self._download_init_data(url, mid, fatal=False)
|
||||||
f'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid={mid}&play=0',
|
info_data = self._make_fcu_req({'info': {
|
||||||
mid, note='Download song detail info',
|
'module': 'music.pf_song_detail_svr',
|
||||||
errnote='Unable to get song detail info', encoding='gbk')
|
'method': 'get_song_detail_yqq',
|
||||||
|
'param': {
|
||||||
|
'song_mid': mid,
|
||||||
|
'song_type': 0,
|
||||||
|
},
|
||||||
|
}}, mid, note='Downloading song info')['info']['data']['track_info']
|
||||||
|
|
||||||
song_name = self._html_search_regex(
|
media_mid = info_data['file']['media_mid']
|
||||||
r"songname:\s*'([^']+)'", detail_info_page, 'song name')
|
|
||||||
|
|
||||||
publish_time = self._html_search_regex(
|
data = self._make_fcu_req({
|
||||||
r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page,
|
'req_1': {
|
||||||
'publish time', default=None)
|
'module': 'vkey.GetVkeyServer',
|
||||||
if publish_time:
|
'method': 'CgiGetVkey',
|
||||||
publish_time = publish_time.replace('-', '')
|
'param': {
|
||||||
|
'guid': str(self._m_r_get_ruin()),
|
||||||
singer = self._html_search_regex(
|
'songmid': [mid] * len(self._FORMATS),
|
||||||
r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None)
|
'songtype': [0] * len(self._FORMATS),
|
||||||
|
'uin': str(self._get_uin()),
|
||||||
lrc_content = self._html_search_regex(
|
'loginflag': 1,
|
||||||
r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>',
|
'platform': '20',
|
||||||
detail_info_page, 'LRC lyrics', default=None)
|
'filename': [f'{f["prefix"]}{media_mid}.{f["ext"]}' for f in self._FORMATS.values()],
|
||||||
if lrc_content:
|
},
|
||||||
lrc_content = lrc_content.replace('\\n', '\n')
|
},
|
||||||
|
'req_2': {
|
||||||
thumbnail_url = None
|
'module': 'music.musichallSong.PlayLyricInfo',
|
||||||
albummid = self._search_regex(
|
'method': 'GetPlayLyricInfo',
|
||||||
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'],
|
'param': {'songMID': mid},
|
||||||
detail_info_page, 'album mid', default=None)
|
},
|
||||||
if albummid:
|
}, mid, note='Downloading formats and lyric', headers=self.geo_verification_headers())
|
||||||
thumbnail_url = f'http://i.gtimg.cn/music/photo/mid_album_500/{albummid[-2:-1]}/{albummid[-1]}/{albummid}.jpg'
|
|
||||||
|
|
||||||
guid = self.m_r_get_ruin()
|
|
||||||
|
|
||||||
vkey = self._download_json(
|
|
||||||
f'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid={guid}',
|
|
||||||
mid, note='Retrieve vkey', errnote='Unable to get vkey',
|
|
||||||
transform_source=strip_jsonp)['key']
|
|
||||||
|
|
||||||
|
code = traverse_obj(data, ('req_1', 'code', {int}))
|
||||||
|
if code != 0:
|
||||||
|
raise ExtractorError(f'Failed to download format info, error code {code or "unknown"}')
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, details in self._FORMATS.items():
|
for media_info in traverse_obj(data, (
|
||||||
|
'req_1', 'data', 'midurlinfo', lambda _, v: v['songmid'] == mid and v['purl']),
|
||||||
|
):
|
||||||
|
format_key = traverse_obj(media_info, ('filename', {str}, {lambda x: x[:4]}))
|
||||||
|
format_info = self._FORMATS.get(format_key) or {}
|
||||||
|
format_id = format_info.get('name')
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': 'http://cc.stream.qqmusic.qq.com/{}{}.{}?vkey={}&guid={}&fromtag=0'.format(
|
'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']),
|
||||||
details['prefix'], mid, details['ext'], vkey, guid),
|
|
||||||
'format': format_id,
|
'format': format_id,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': details['preference'],
|
'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})),
|
||||||
'abr': details.get('abr'),
|
'quality': format_info.get('preference'),
|
||||||
|
'abr': format_info.get('abr'),
|
||||||
|
'ext': format_info.get('ext'),
|
||||||
|
'vcodec': 'none',
|
||||||
})
|
})
|
||||||
self._check_formats(formats, mid)
|
|
||||||
|
|
||||||
actual_lrc_lyrics = ''.join(
|
if not formats and not self.is_logged_in:
|
||||||
line + '\n' for line in re.findall(
|
self.raise_login_required()
|
||||||
r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
|
|
||||||
|
if traverse_obj(data, ('req_2', 'code')):
|
||||||
|
self.report_warning(f'Failed to download lyric, error {data["req_2"]["code"]!r}')
|
||||||
|
lrc_content = traverse_obj(data, ('req_2', 'data', 'lyric', {lambda x: base64.b64decode(x).decode('utf-8')}))
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': mid,
|
'id': mid,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': song_name,
|
**traverse_obj(info_data, {
|
||||||
'release_date': publish_time,
|
'title': ('title', {str}),
|
||||||
'creator': singer,
|
'album': ('album', 'title', {str}, {lambda x: x or None}),
|
||||||
'description': lrc_content,
|
'release_date': ('time_public', {lambda x: x.replace('-', '') or None}),
|
||||||
'thumbnail': thumbnail_url,
|
'creators': ('singer', ..., 'name', {str}),
|
||||||
}
|
'alt_title': ('subtitle', {str}, {lambda x: x or None}),
|
||||||
if actual_lrc_lyrics:
|
'duration': ('interval', {int_or_none}),
|
||||||
info_dict['subtitles'] = {
|
}),
|
||||||
'origin': [{
|
**traverse_obj(init_data, ('detail', {
|
||||||
'ext': 'lrc',
|
'thumbnail': ('picurl', {url_or_none}),
|
||||||
'data': actual_lrc_lyrics,
|
'description': ('info', 'intro', 'content', ..., 'value', {str}),
|
||||||
}],
|
'genres': ('info', 'genre', 'content', ..., 'value', {str}, all),
|
||||||
|
}), get_all=False),
|
||||||
}
|
}
|
||||||
|
if lrc_content:
|
||||||
|
info_dict['subtitles'] = {'origin': [{'ext': 'lrc', 'data': lrc_content}]}
|
||||||
|
info_dict['description'] = join_nonempty(info_dict.get('description'), lrc_content, delim='\n')
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
class QQPlaylistBaseIE(InfoExtractor):
|
class QQMusicSingerIE(QQMusicBaseIE):
|
||||||
@staticmethod
|
|
||||||
def qq_static_url(category, mid):
|
|
||||||
return f'http://y.qq.com/y/static/{category}/{mid[-2]}/{mid[-1]}/{mid}.html'
|
|
||||||
|
|
||||||
def get_singer_all_songs(self, singmid, num):
|
|
||||||
return self._download_webpage(
|
|
||||||
r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
|
|
||||||
query={
|
|
||||||
'format': 'json',
|
|
||||||
'inCharset': 'utf8',
|
|
||||||
'outCharset': 'utf-8',
|
|
||||||
'platform': 'yqq',
|
|
||||||
'needNewCode': 0,
|
|
||||||
'singermid': singmid,
|
|
||||||
'order': 'listen',
|
|
||||||
'begin': 0,
|
|
||||||
'num': num,
|
|
||||||
'songstatus': 1,
|
|
||||||
})
|
|
||||||
|
|
||||||
def get_entries_from_page(self, singmid):
|
|
||||||
entries = []
|
|
||||||
|
|
||||||
default_num = 1
|
|
||||||
json_text = self.get_singer_all_songs(singmid, default_num)
|
|
||||||
json_obj_all_songs = self._parse_json(json_text, singmid)
|
|
||||||
|
|
||||||
if json_obj_all_songs['code'] == 0:
|
|
||||||
total = json_obj_all_songs['data']['total']
|
|
||||||
json_text = self.get_singer_all_songs(singmid, total)
|
|
||||||
json_obj_all_songs = self._parse_json(json_text, singmid)
|
|
||||||
|
|
||||||
for item in json_obj_all_songs['data']['list']:
|
|
||||||
if item['musicData'].get('songmid') is not None:
|
|
||||||
songmid = item['musicData']['songmid']
|
|
||||||
entries.append(self.url_result(
|
|
||||||
rf'https://y.qq.com/n/yqq/song/{songmid}.html', 'QQMusic', songmid))
|
|
||||||
|
|
||||||
return entries
|
|
||||||
|
|
||||||
|
|
||||||
class QQMusicSingerIE(QQPlaylistBaseIE):
|
|
||||||
IE_NAME = 'qqmusic:singer'
|
IE_NAME = 'qqmusic:singer'
|
||||||
IE_DESC = 'QQ音乐 - 歌手'
|
IE_DESC = 'QQ音乐 - 歌手'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html'
|
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/singer/(?P<id>[0-9A-Za-z]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html',
|
'url': 'https://y.qq.com/n/ryqq/singer/001BLpXF2DyJe2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '001BLpXF2DyJe2',
|
'id': '001BLpXF2DyJe2',
|
||||||
'title': '林俊杰',
|
'title': '林俊杰',
|
||||||
'description': 'md5:870ec08f7d8547c29c93010899103751',
|
'description': 'md5:10624ce73b06fa400bc846f59b0305fa',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 12,
|
'playlist_mincount': 100,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://y.qq.com/n/ryqq/singer/000Q00f213YzNV',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '000Q00f213YzNV',
|
||||||
|
'title': '桃几OvO',
|
||||||
|
'description': '小破站小唱见~希望大家喜欢听我唱歌~!',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||||
|
},
|
||||||
|
'playlist_count': 12,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0016cvsy02mmCl',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '群青',
|
||||||
|
'album': '桃几2021年翻唱集',
|
||||||
|
'release_date': '20210913',
|
||||||
|
'duration': 248,
|
||||||
|
'creators': ['桃几OvO'],
|
||||||
|
'genres': ['Pop'],
|
||||||
|
'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae',
|
||||||
|
'size': 3970822,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}]
|
||||||
|
|
||||||
|
_PAGE_SIZE = 50
|
||||||
|
|
||||||
|
def _fetch_page(self, mid, page_size, page_num):
|
||||||
|
data = self._make_fcu_req({'req_1': {
|
||||||
|
'module': 'music.web_singer_info_svr',
|
||||||
|
'method': 'get_singer_detail_info',
|
||||||
|
'param': {
|
||||||
|
'sort': 5,
|
||||||
|
'singermid': mid,
|
||||||
|
'sin': page_num * page_size,
|
||||||
|
'num': page_size,
|
||||||
|
}}}, mid, note=f'Downloading page {page_num}')
|
||||||
|
yield from traverse_obj(data, ('req_1', 'data', 'songlist', ..., {lambda x: self.url_result(
|
||||||
|
f'https://y.qq.com/n/ryqq/songDetail/{x["mid"]}', QQMusicIE, x['mid'], x.get('title'))}))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mid = self._match_id(url)
|
mid = self._match_id(url)
|
||||||
|
init_data = self._download_init_data(url, mid, fatal=False)
|
||||||
|
|
||||||
entries = self.get_entries_from_page(mid)
|
return self.playlist_result(
|
||||||
singer_page = self._download_webpage(url, mid, 'Download singer page')
|
OnDemandPagedList(functools.partial(self._fetch_page, mid, self._PAGE_SIZE), self._PAGE_SIZE),
|
||||||
singer_name = self._html_search_regex(
|
mid, **traverse_obj(init_data, ('singerDetail', {
|
||||||
r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None)
|
'title': ('basic_info', 'name', {str}),
|
||||||
singer_desc = None
|
'description': ('ex_info', 'desc', {str}),
|
||||||
|
'thumbnail': ('pic', 'pic', {url_or_none}),
|
||||||
|
})))
|
||||||
|
|
||||||
if mid:
|
|
||||||
singer_desc_page = self._download_xml(
|
|
||||||
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
|
|
||||||
'Donwload singer description XML',
|
|
||||||
query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
|
|
||||||
headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
|
|
||||||
|
|
||||||
singer_desc = singer_desc_page.find('./data/info/desc').text
|
class QQPlaylistBaseIE(InfoExtractor):
|
||||||
|
def _extract_entries(self, info_json, path):
|
||||||
return self.playlist_result(entries, mid, singer_name, singer_desc)
|
for song in traverse_obj(info_json, path):
|
||||||
|
song_mid = song.get('songmid')
|
||||||
|
if not song_mid:
|
||||||
|
continue
|
||||||
|
yield self.url_result(
|
||||||
|
f'https://y.qq.com/n/ryqq/songDetail/{song_mid}',
|
||||||
|
QQMusicIE, song_mid, song.get('songname'))
|
||||||
|
|
||||||
|
|
||||||
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||||
IE_NAME = 'qqmusic:album'
|
IE_NAME = 'qqmusic:album'
|
||||||
IE_DESC = 'QQ音乐 - 专辑'
|
IE_DESC = 'QQ音乐 - 专辑'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html'
|
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/albumDetail/(?P<id>[0-9A-Za-z]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html',
|
'url': 'https://y.qq.com/n/ryqq/albumDetail/000gXCTb2AhRR1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '000gXCTb2AhRR1',
|
'id': '000gXCTb2AhRR1',
|
||||||
'title': '我们都是这样长大的',
|
'title': '我们都是这样长大的',
|
||||||
|
@ -236,10 +321,10 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_count': 4,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html',
|
'url': 'https://y.qq.com/n/ryqq/albumDetail/002Y5a3b3AlCu3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '002Y5a3b3AlCu3',
|
'id': '002Y5a3b3AlCu3',
|
||||||
'title': '그리고...',
|
'title': '그리고…',
|
||||||
'description': 'md5:a48823755615508a95080e81b51ba729',
|
'description': 'md5:a48823755615508a95080e81b51ba729',
|
||||||
},
|
},
|
||||||
'playlist_count': 8,
|
'playlist_count': 8,
|
||||||
|
@ -248,49 +333,45 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mid = self._match_id(url)
|
mid = self._match_id(url)
|
||||||
|
|
||||||
album = self._download_json(
|
album_json = self._download_json(
|
||||||
f'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid={mid}&format=json',
|
'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg',
|
||||||
mid, 'Download album page')['data']
|
mid, 'Download album page',
|
||||||
|
query={'albummid': mid, 'format': 'json'})['data']
|
||||||
|
|
||||||
entries = [
|
entries = self._extract_entries(album_json, ('list', ...))
|
||||||
self.url_result(
|
|
||||||
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'],
|
|
||||||
) for song in album['list']
|
|
||||||
]
|
|
||||||
album_name = album.get('name')
|
|
||||||
album_detail = album.get('desc')
|
|
||||||
if album_detail is not None:
|
|
||||||
album_detail = album_detail.strip()
|
|
||||||
|
|
||||||
return self.playlist_result(entries, mid, album_name, album_detail)
|
return self.playlist_result(entries, mid, **traverse_obj(album_json, {
|
||||||
|
'title': ('name', {str}),
|
||||||
|
'description': ('desc', {str.strip}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
|
||||||
class QQMusicToplistIE(QQPlaylistBaseIE):
|
class QQMusicToplistIE(QQPlaylistBaseIE):
|
||||||
IE_NAME = 'qqmusic:toplist'
|
IE_NAME = 'qqmusic:toplist'
|
||||||
IE_DESC = 'QQ音乐 - 排行榜'
|
IE_DESC = 'QQ音乐 - 排行榜'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html'
|
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/toplist/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://y.qq.com/n/yqq/toplist/123.html',
|
'url': 'https://y.qq.com/n/ryqq/toplist/123',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '123',
|
'id': '123',
|
||||||
'title': '美国iTunes榜',
|
'title': r're:美国热门音乐榜 \d{4}-\d{2}-\d{2}',
|
||||||
'description': 'md5:89db2335fdbb10678dee2d43fe9aba08',
|
'description': '美国热门音乐榜,每周一更新。',
|
||||||
},
|
},
|
||||||
'playlist_count': 100,
|
'playlist_count': 95,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://y.qq.com/n/yqq/toplist/3.html',
|
'url': 'https://y.qq.com/n/ryqq/toplist/3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3',
|
'id': '3',
|
||||||
'title': '巅峰榜·欧美',
|
'title': r're:巅峰榜·欧美 \d{4}-\d{2}-\d{2}',
|
||||||
'description': 'md5:5a600d42c01696b26b71f8c4d43407da',
|
'description': 'md5:4def03b60d3644be4c9a36f21fd33857',
|
||||||
},
|
},
|
||||||
'playlist_count': 100,
|
'playlist_count': 100,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://y.qq.com/n/yqq/toplist/106.html',
|
'url': 'https://y.qq.com/n/ryqq/toplist/106',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '106',
|
'id': '106',
|
||||||
'title': '韩国Mnet榜',
|
'title': r're:韩国Mnet榜 \d{4}-\d{2}-\d{2}',
|
||||||
'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
|
'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
|
||||||
},
|
},
|
||||||
'playlist_count': 50,
|
'playlist_count': 50,
|
||||||
|
@ -304,33 +385,20 @@ def _real_extract(self, url):
|
||||||
note='Download toplist page',
|
note='Download toplist page',
|
||||||
query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
|
query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
|
||||||
|
|
||||||
entries = [self.url_result(
|
return self.playlist_result(
|
||||||
'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic',
|
self._extract_entries(toplist_json, ('songlist', ..., 'data')), list_id,
|
||||||
song['data']['songmid'])
|
playlist_title=join_nonempty(*traverse_obj(
|
||||||
for song in toplist_json['songlist']]
|
toplist_json, ((('topinfo', 'ListName'), 'update_time'), None)), delim=' '),
|
||||||
|
playlist_description=traverse_obj(toplist_json, ('topinfo', 'info')))
|
||||||
topinfo = toplist_json.get('topinfo', {})
|
|
||||||
list_name = topinfo.get('ListName')
|
|
||||||
list_description = topinfo.get('info')
|
|
||||||
return self.playlist_result(entries, list_id, list_name, list_description)
|
|
||||||
|
|
||||||
|
|
||||||
class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||||
IE_NAME = 'qqmusic:playlist'
|
IE_NAME = 'qqmusic:playlist'
|
||||||
IE_DESC = 'QQ音乐 - 歌单'
|
IE_DESC = 'QQ音乐 - 歌单'
|
||||||
_VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html'
|
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/playlist/(?P<id>[0-9]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html',
|
'url': 'https://y.qq.com/n/ryqq/playlist/1374105607',
|
||||||
'info_dict': {
|
|
||||||
'id': '3462654915',
|
|
||||||
'title': '韩国5月新歌精选下旬',
|
|
||||||
'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
|
|
||||||
},
|
|
||||||
'playlist_count': 40,
|
|
||||||
'skip': 'playlist gone',
|
|
||||||
}, {
|
|
||||||
'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1374105607',
|
'id': '1374105607',
|
||||||
'title': '易入人心的华语民谣',
|
'title': '易入人心的华语民谣',
|
||||||
|
@ -346,19 +414,83 @@ def _real_extract(self, url):
|
||||||
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
|
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
|
||||||
list_id, 'Download list page',
|
list_id, 'Download list page',
|
||||||
query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
|
query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
|
||||||
transform_source=strip_jsonp)
|
transform_source=strip_jsonp, headers={'Referer': url})
|
||||||
if not len(list_json.get('cdlist', [])):
|
if not len(list_json.get('cdlist', [])):
|
||||||
if list_json.get('code'):
|
raise ExtractorError(join_nonempty(
|
||||||
raise ExtractorError(
|
'Unable to get playlist info',
|
||||||
'QQ Music said: error %d in fetching playlist info' % list_json['code'],
|
join_nonempty('code', 'subcode', from_dict=list_json),
|
||||||
expected=True)
|
list_json.get('msg'), delim=': '))
|
||||||
raise ExtractorError('Unable to get playlist info')
|
|
||||||
|
|
||||||
cdlist = list_json['cdlist'][0]
|
entries = self._extract_entries(list_json, ('cdlist', 0, 'songlist', ...))
|
||||||
entries = [self.url_result(
|
|
||||||
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
|
|
||||||
for song in cdlist['songlist']]
|
|
||||||
|
|
||||||
list_name = cdlist.get('dissname')
|
return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, {
|
||||||
list_description = clean_html(unescapeHTML(cdlist.get('desc')))
|
'title': ('dissname', {str}),
|
||||||
return self.playlist_result(entries, list_id, list_name, list_description)
|
'description': ('desc', {unescapeHTML}, {clean_html}),
|
||||||
|
})))
|
||||||
|
|
||||||
|
|
||||||
|
class QQMusicVideoIE(QQMusicBaseIE):
|
||||||
|
IE_NAME = 'qqmusic:mv'
|
||||||
|
IE_DESC = 'QQ音乐 - MV'
|
||||||
|
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/mv/(?P<id>[0-9A-Za-z]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://y.qq.com/n/ryqq/mv/002Vsarh3SVU8K',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '002Vsarh3SVU8K',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Chant (Extended Mix / Audio)',
|
||||||
|
'description': '',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
|
||||||
|
'release_timestamp': 1688918400,
|
||||||
|
'release_date': '20230709',
|
||||||
|
'duration': 313,
|
||||||
|
'creators': ['Duke Dumont'],
|
||||||
|
'view_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _parse_url_formats(self, url_data):
|
||||||
|
return traverse_obj(url_data, ('mp4', lambda _, v: v['freeflow_url'], {
|
||||||
|
'url': ('freeflow_url', 0, {url_or_none}),
|
||||||
|
'filesize': ('fileSize', {int_or_none}),
|
||||||
|
'format_id': ('newFileType', {str_or_none}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video_info = self._make_fcu_req({
|
||||||
|
'mvInfo': {
|
||||||
|
'module': 'music.video.VideoData',
|
||||||
|
'method': 'get_video_info_batch',
|
||||||
|
'param': {
|
||||||
|
'vidlist': [video_id],
|
||||||
|
'required': [
|
||||||
|
'vid', 'type', 'sid', 'cover_pic', 'duration', 'singers',
|
||||||
|
'video_pay', 'hint', 'code', 'msg', 'name', 'desc',
|
||||||
|
'playcnt', 'pubdate', 'play_forbid_reason'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'mvUrl': {
|
||||||
|
'module': 'music.stream.MvUrlProxy',
|
||||||
|
'method': 'GetMvUrls',
|
||||||
|
'param': {'vids': [video_id]},
|
||||||
|
},
|
||||||
|
}, video_id, headers=self.geo_verification_headers())
|
||||||
|
if traverse_obj(video_info, ('mvInfo', 'data', video_id, 'play_forbid_reason')) == 3:
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': self._parse_url_formats(traverse_obj(video_info, ('mvUrl', 'data', video_id))),
|
||||||
|
**traverse_obj(video_info, ('mvInfo', 'data', video_id, {
|
||||||
|
'title': ('name', {str}),
|
||||||
|
'description': ('desc', {str}),
|
||||||
|
'thumbnail': ('cover_pic', {url_or_none}),
|
||||||
|
'release_timestamp': ('pubdate', {int_or_none}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'creators': ('singers', ..., 'name', {str}),
|
||||||
|
'view_count': ('playcnt', {int_or_none}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
|
|
@ -314,23 +314,11 @@ def add_format(f, protocol, is_preview=False):
|
||||||
self.write_debug(f'"{identifier}" is not a requested format, skipping')
|
self.write_debug(f'"{identifier}" is not a requested format, skipping')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
stream = None
|
# XXX: if not extract_flat, 429 error must be caught where _extract_info_dict is called
|
||||||
for retry in self.RetryManager(fatal=False):
|
stream_url = traverse_obj(self._call_api(
|
||||||
try:
|
|
||||||
stream = self._call_api(
|
|
||||||
format_url, track_id, f'Downloading {identifier} format info JSON',
|
format_url, track_id, f'Downloading {identifier} format info JSON',
|
||||||
query=query, headers=self._HEADERS)
|
query=query, headers=self._HEADERS), ('url', {url_or_none}))
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 429:
|
|
||||||
self.report_warning(
|
|
||||||
'You have reached the API rate limit, which is ~600 requests per '
|
|
||||||
'10 minutes. Use the --extractor-retries and --retry-sleep options '
|
|
||||||
'to configure an appropriate retry count and wait time', only_once=True)
|
|
||||||
retry.error = e.cause
|
|
||||||
else:
|
|
||||||
self.report_warning(e.msg)
|
|
||||||
|
|
||||||
stream_url = traverse_obj(stream, ('url', {url_or_none}))
|
|
||||||
if invalid_url(stream_url):
|
if invalid_url(stream_url):
|
||||||
continue
|
continue
|
||||||
format_urls.add(stream_url)
|
format_urls.add(stream_url)
|
||||||
|
@ -647,7 +635,17 @@ def _real_extract(self, url):
|
||||||
info = self._call_api(
|
info = self._call_api(
|
||||||
info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
|
info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
|
||||||
|
|
||||||
|
for retry in self.RetryManager():
|
||||||
|
try:
|
||||||
return self._extract_info_dict(info, full_title, token)
|
return self._extract_info_dict(info, full_title, token)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||||
|
raise
|
||||||
|
self.report_warning(
|
||||||
|
'You have reached the API rate limit, which is ~600 requests per '
|
||||||
|
'10 minutes. Use the --extractor-retries and --retry-sleep options '
|
||||||
|
'to configure an appropriate retry count and wait time', only_once=True)
|
||||||
|
retry.error = e.cause
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
|
class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
|
||||||
|
@ -873,7 +871,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE):
|
||||||
'id': '30909869',
|
'id': '30909869',
|
||||||
'title': 'neilcic',
|
'title': 'neilcic',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 23,
|
'playlist_mincount': 22,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -882,7 +880,7 @@ def _real_extract(self, url):
|
||||||
self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS)
|
self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS)
|
||||||
|
|
||||||
return self._extract_playlist(
|
return self._extract_playlist(
|
||||||
f'{self._API_V2_BASE}stream/users/{user["id"]}', str(user['id']), user.get('username'))
|
f'{self._API_V2_BASE}users/{user["id"]}/tracks', str(user['id']), user.get('username'))
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
|
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
|
||||||
|
|
|
@ -1,55 +1,31 @@
|
||||||
from .common import InfoExtractor
|
from .vidyard import VidyardBaseIE
|
||||||
from ..utils import ExtractorError, int_or_none, traverse_obj
|
from ..utils import ExtractorError, int_or_none, make_archive_id
|
||||||
|
|
||||||
|
|
||||||
class SwearnetEpisodeIE(InfoExtractor):
|
class SwearnetEpisodeIE(VidyardBaseIE):
|
||||||
_VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)'
|
_VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1',
|
'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '232819',
|
'id': 'wicK2EOzjOdxkUXGDIgcPw',
|
||||||
|
'display_id': '232819',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
'episode': 'Episode 1',
|
'episode': 'Episode 1',
|
||||||
'duration': 719,
|
'duration': 719,
|
||||||
'description': 'md5:c48ef71440ce466284c07085cd7bd761',
|
'description': r're:Are you drunk and high and craving a grilled cheese sandwich.+',
|
||||||
'season': 'Season 1',
|
'season': 'Season 1',
|
||||||
'title': 'Episode 1 - Grilled Cheese Sammich',
|
'title': 'Episode 1 - Grilled Cheese Sammich',
|
||||||
'season_number': 1,
|
'season_number': 1,
|
||||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/232819/_RX04IKIq60a2V6rIRqq_Q_small.jpg',
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/custom/0dd74f9b-388a-452e-b570-b407fb64435b_small.jpg',
|
||||||
|
'tags': ['Getting Learnt with Ricky', 'drunk', 'grilled cheese', 'high'],
|
||||||
|
'_old_archive_ids': ['swearnetepisode 232819'],
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _get_formats_and_subtitle(self, video_source, video_id):
|
|
||||||
video_source = video_source or {}
|
|
||||||
formats, subtitles = [], {}
|
|
||||||
for key, value in video_source.items():
|
|
||||||
if key == 'hls':
|
|
||||||
for video_hls in value:
|
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(video_hls.get('url'), video_id)
|
|
||||||
formats.extend(fmts)
|
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
|
||||||
else:
|
|
||||||
formats.extend({
|
|
||||||
'url': video_mp4.get('url'),
|
|
||||||
'ext': 'mp4',
|
|
||||||
} for video_mp4 in value)
|
|
||||||
|
|
||||||
return formats, subtitles
|
|
||||||
|
|
||||||
def _get_direct_subtitle(self, caption_json):
|
|
||||||
subs = {}
|
|
||||||
for caption in caption_json:
|
|
||||||
subs.setdefault(caption.get('language') or 'und', []).append({
|
|
||||||
'url': caption.get('vttUrl'),
|
|
||||||
'name': caption.get('name'),
|
|
||||||
})
|
|
||||||
|
|
||||||
return subs
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
slug, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, slug)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
|
||||||
|
@ -58,22 +34,12 @@ def _real_extract(self, url):
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
json_data = self._download_json(
|
info = self._process_video_json(self._fetch_video_json(external_id)['chapters'][0], external_id)
|
||||||
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
|
if info.get('display_id'):
|
||||||
|
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||||
formats, subtitles = self._get_formats_and_subtitle(json_data['sources'], display_id)
|
|
||||||
self._merge_subtitles(self._get_direct_subtitle(json_data.get('captions')), target=subtitles)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': str(json_data['videoId']),
|
**info,
|
||||||
'title': json_data.get('name') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
|
||||||
'description': (json_data.get('description')
|
|
||||||
or self._html_search_meta(['og:description', 'twitter:description'], webpage)),
|
|
||||||
'duration': int_or_none(json_data.get('seconds')),
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'season_number': int_or_none(season_number),
|
'season_number': int_or_none(season_number),
|
||||||
'episode_number': int_or_none(episode_number),
|
'episode_number': int_or_none(episode_number),
|
||||||
'thumbnails': [{'url': thumbnail_url}
|
|
||||||
for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))],
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,13 +23,13 @@
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
try_call,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,8 +43,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
'iid': None,
|
'iid': None,
|
||||||
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
|
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
|
||||||
'app_name': 'musical_ly',
|
'app_name': 'musical_ly',
|
||||||
'app_version': '34.1.2',
|
'app_version': '35.1.3',
|
||||||
'manifest_app_version': '2023401020',
|
'manifest_app_version': '2023501030',
|
||||||
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
||||||
'aid': '0',
|
'aid': '0',
|
||||||
}
|
}
|
||||||
|
@ -114,7 +114,7 @@ def _get_universal_data(self, webpage, display_id):
|
||||||
'universal data', display_id, end_pattern=r'</script>', default={}),
|
'universal data', display_id, end_pattern=r'</script>', default={}),
|
||||||
('__DEFAULT_SCOPE__', {dict})) or {}
|
('__DEFAULT_SCOPE__', {dict})) or {}
|
||||||
|
|
||||||
def _call_api_impl(self, ep, query, video_id, fatal=True,
|
def _call_api_impl(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||||
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
|
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
|
||||||
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||||
|
@ -125,7 +125,8 @@ def _call_api_impl(self, ep, query, video_id, fatal=True,
|
||||||
fatal=fatal, note=note, errnote=errnote, headers={
|
fatal=fatal, note=note, errnote=errnote, headers={
|
||||||
'User-Agent': self._APP_USER_AGENT,
|
'User-Agent': self._APP_USER_AGENT,
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
}, query=query)
|
**(headers or {}),
|
||||||
|
}, query=query, data=data)
|
||||||
|
|
||||||
def _build_api_query(self, query):
|
def _build_api_query(self, query):
|
||||||
return filter_dict({
|
return filter_dict({
|
||||||
|
@ -174,7 +175,7 @@ def _build_api_query(self, query):
|
||||||
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
||||||
})
|
})
|
||||||
|
|
||||||
def _call_api(self, ep, query, video_id, fatal=True,
|
def _call_api(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||||
if not self._APP_INFO and not self._get_next_app_info():
|
if not self._APP_INFO and not self._get_next_app_info():
|
||||||
message = 'No working app info is available'
|
message = 'No working app info is available'
|
||||||
|
@ -187,9 +188,11 @@ def _call_api(self, ep, query, video_id, fatal=True,
|
||||||
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
|
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
|
||||||
for count in itertools.count(1):
|
for count in itertools.count(1):
|
||||||
self.write_debug(str(self._APP_INFO))
|
self.write_debug(str(self._APP_INFO))
|
||||||
real_query = self._build_api_query(query)
|
real_query = self._build_api_query(query or {})
|
||||||
try:
|
try:
|
||||||
return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
|
return self._call_api_impl(
|
||||||
|
ep, video_id, query=real_query, data=data, headers=headers,
|
||||||
|
fatal=fatal, note=note, errnote=errnote)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||||
message = str(e.cause or e.msg)
|
message = str(e.cause or e.msg)
|
||||||
|
@ -204,12 +207,13 @@ def _call_api(self, ep, query, video_id, fatal=True,
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _extract_aweme_app(self, aweme_id):
|
def _extract_aweme_app(self, aweme_id):
|
||||||
feed_list = self._call_api(
|
aweme_detail = traverse_obj(
|
||||||
'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed',
|
self._call_api('multi/aweme/detail', aweme_id, data=urlencode_postdata({
|
||||||
errnote='Unable to download video feed').get('aweme_list') or []
|
'aweme_ids': f'[{aweme_id}]',
|
||||||
aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
|
'request_source': '0',
|
||||||
|
}), headers={'X-Argus': ''}), ('aweme_details', 0, {dict}))
|
||||||
if not aweme_detail:
|
if not aweme_detail:
|
||||||
raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
|
raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id)
|
||||||
return self._parse_aweme_video_app(aweme_detail)
|
return self._parse_aweme_video_app(aweme_detail)
|
||||||
|
|
||||||
def _extract_web_data_and_status(self, url, video_id, fatal=True):
|
def _extract_web_data_and_status(self, url, video_id, fatal=True):
|
||||||
|
@ -249,7 +253,16 @@ def _extract_web_data_and_status(self, url, video_id, fatal=True):
|
||||||
|
|
||||||
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
||||||
# TODO: Extract text positioning info
|
# TODO: Extract text positioning info
|
||||||
|
|
||||||
|
EXT_MAP = { # From lowest to highest preference
|
||||||
|
'creator_caption': 'json',
|
||||||
|
'srt': 'srt',
|
||||||
|
'webvtt': 'vtt',
|
||||||
|
}
|
||||||
|
preference = qualities(tuple(EXT_MAP.values()))
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
|
||||||
# aweme/detail endpoint subs
|
# aweme/detail endpoint subs
|
||||||
captions_info = traverse_obj(
|
captions_info = traverse_obj(
|
||||||
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
|
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
|
||||||
|
@ -273,8 +286,8 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
||||||
if not caption.get('url'):
|
if not caption.get('url'):
|
||||||
continue
|
continue
|
||||||
subtitles.setdefault(caption.get('lang') or 'en', []).append({
|
subtitles.setdefault(caption.get('lang') or 'en', []).append({
|
||||||
'ext': remove_start(caption.get('caption_format'), 'web'),
|
|
||||||
'url': caption['url'],
|
'url': caption['url'],
|
||||||
|
'ext': EXT_MAP.get(caption.get('Format')),
|
||||||
})
|
})
|
||||||
# webpage subs
|
# webpage subs
|
||||||
if not subtitles:
|
if not subtitles:
|
||||||
|
@ -283,9 +296,14 @@ def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
||||||
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
|
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
|
||||||
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
|
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
|
||||||
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
|
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
|
||||||
'ext': remove_start(caption.get('Format'), 'web'),
|
|
||||||
'url': caption['Url'],
|
'url': caption['Url'],
|
||||||
|
'ext': EXT_MAP.get(caption.get('Format')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Deprioritize creator_caption json since it can't be embedded or used by media players
|
||||||
|
for lang, subs_list in subtitles.items():
|
||||||
|
subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))
|
||||||
|
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _parse_url_key(self, url_key):
|
def _parse_url_key(self, url_key):
|
||||||
|
@ -1037,7 +1055,8 @@ def _entries(self, list_id, display_id):
|
||||||
for retry in self.RetryManager():
|
for retry in self.RetryManager():
|
||||||
try:
|
try:
|
||||||
post_list = self._call_api(
|
post_list = self._call_api(
|
||||||
self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
|
self._API_ENDPOINT, display_id, query=query,
|
||||||
|
note=f'Downloading video list page {page}',
|
||||||
errnote='Unable to download video list')
|
errnote='Unable to download video list')
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||||
|
@ -1452,9 +1471,11 @@ def _real_extract(self, url):
|
||||||
|
|
||||||
if webpage:
|
if webpage:
|
||||||
data = self._get_sigi_state(webpage, uploader or room_id)
|
data = self._get_sigi_state(webpage, uploader or room_id)
|
||||||
room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
|
room_id = (
|
||||||
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
|
traverse_obj(data, ((
|
||||||
or room_id)
|
('LiveRoom', 'liveRoomUserInfo', 'user'),
|
||||||
|
('UserModule', 'users', ...)), 'roomId', {str}, any))
|
||||||
|
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id))
|
||||||
uploader = uploader or traverse_obj(
|
uploader = uploader or traverse_obj(
|
||||||
data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
|
data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
|
||||||
('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
|
('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
|
||||||
|
|
|
@ -28,35 +28,11 @@ class ToggleIE(InfoExtractor):
|
||||||
'skip_download': 'm3u8 download',
|
'skip_download': 'm3u8 download',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'note': 'DRM-protected video',
|
|
||||||
'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
|
'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '341413',
|
|
||||||
'ext': 'wvm',
|
|
||||||
'title': 'Dug\'s Special Mission',
|
|
||||||
'description': 'md5:e86c6f4458214905c1772398fabc93e0',
|
|
||||||
'upload_date': '20150827',
|
|
||||||
'timestamp': 1440644006,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'DRM-protected wvm download',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# this also tests correct video id extraction
|
|
||||||
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
|
|
||||||
'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
|
'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '332861',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '28th SEA Games (5 Show) - Episode 11',
|
|
||||||
'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
|
|
||||||
'upload_date': '20150605',
|
|
||||||
'timestamp': 1433480166,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'DRM-protected wvm download',
|
|
||||||
},
|
|
||||||
'skip': 'm3u8 links are geo-restricted',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
|
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -21,7 +21,7 @@ def _perform_login(self, username, password):
|
||||||
if not urlh:
|
if not urlh:
|
||||||
return
|
return
|
||||||
|
|
||||||
content, urlh = self._download_webpage_handle(
|
response = self._download_webpage_handle(
|
||||||
urlh.url, None, fatal=False, headers={'referer': urlh.url},
|
urlh.url, None, fatal=False, headers={'referer': urlh.url},
|
||||||
note='logging in', errnote='unable to log in',
|
note='logging in', errnote='unable to log in',
|
||||||
data=urlencode_postdata({
|
data=urlencode_postdata({
|
||||||
|
@ -30,7 +30,11 @@ def _perform_login(self, username, password):
|
||||||
'j_username': username,
|
'j_username': username,
|
||||||
'j_password': password,
|
'j_password': password,
|
||||||
}))
|
}))
|
||||||
if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
|
if not response:
|
||||||
|
return
|
||||||
|
|
||||||
|
content, urlh = response
|
||||||
|
if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self._html_search_regex(
|
if not self._html_search_regex(
|
||||||
|
@ -39,7 +43,7 @@ def _perform_login(self, username, password):
|
||||||
self.report_warning('unable to login: incorrect password')
|
self.report_warning('unable to login: incorrect password')
|
||||||
return
|
return
|
||||||
|
|
||||||
content, urlh = self._download_webpage_handle(
|
urlh = self._request_webpage(
|
||||||
urlh.url, None, fatal=False, headers={'referer': urlh.url},
|
urlh.url, None, fatal=False, headers={'referer': urlh.url},
|
||||||
note='logging in with TFA', errnote='unable to log in with TFA',
|
note='logging in with TFA', errnote='unable to log in with TFA',
|
||||||
data=urlencode_postdata({
|
data=urlencode_postdata({
|
||||||
|
|
|
@ -96,7 +96,7 @@ def _extract_subtitles(data_captions):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||||
|
|
||||||
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
|
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
|
||||||
self.raise_geo_restricted(countries=['FR'])
|
self.raise_geo_restricted(countries=['FR'])
|
||||||
|
@ -122,8 +122,9 @@ def process_video_files(v):
|
||||||
if not token:
|
if not token:
|
||||||
continue
|
continue
|
||||||
deferred_json = self._download_json(
|
deferred_json = self._download_json(
|
||||||
f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
|
f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true',
|
||||||
note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
|
display_id, 'Downloading deferred info', fatal=False, impersonate=True,
|
||||||
|
headers={'Authorization': f'Bearer {token}'})
|
||||||
v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
|
v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
|
||||||
if not v_url:
|
if not v_url:
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -1,60 +1,29 @@
|
||||||
import functools
|
import functools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from .brightcove import BrightcoveNewIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
|
from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
|
||||||
from ..utils.traversal import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class TVAIE(InfoExtractor):
|
class TVAIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
|
IE_NAME = 'tvaplus'
|
||||||
|
IE_DESC = 'TVA+'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?tvaplus\.ca/(?:[^/?#]+/)*[\w-]+-(?P<id>\d+)(?:$|[#?])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://videos.tva.ca/details/_5596811470001',
|
'url': 'https://www.tvaplus.ca/tva/alerte-amber/saison-1/episode-01-1000036619',
|
||||||
'info_dict': {
|
|
||||||
'id': '5596811470001',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
|
|
||||||
'uploader_id': '5481942443001',
|
|
||||||
'upload_date': '20171003',
|
|
||||||
'timestamp': 1507064617,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'HTTP Error 404: Not Found',
|
|
||||||
}, {
|
|
||||||
'url': 'https://video.tva.ca/details/_5596811470001',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
|
|
||||||
'ie_key': 'BrightcoveNew',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class QubIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
|
|
||||||
'md5': '949490fd0e7aee11d0543777611fbd53',
|
'md5': '949490fd0e7aee11d0543777611fbd53',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6084352463001',
|
'id': '6084352463001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ép 01. Mon dernier jour',
|
'title': 'Mon dernier jour',
|
||||||
'uploader_id': '5481942443001',
|
'uploader_id': '5481942443001',
|
||||||
'upload_date': '20190907',
|
'upload_date': '20190907',
|
||||||
'timestamp': 1567899756,
|
'timestamp': 1567899756,
|
||||||
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
|
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
|
||||||
'thumbnail': r're:https://.+\.jpg',
|
'thumbnail': r're:https://.+\.jpg',
|
||||||
'episode': 'Ép 01. Mon dernier jour',
|
'episode': 'Mon dernier jour',
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
|
'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
|
||||||
'duration': 2625.963,
|
'duration': 2625.963,
|
||||||
|
@ -64,23 +33,36 @@ class QubIE(InfoExtractor):
|
||||||
'channel': 'TVA',
|
'channel': 'TVA',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
|
'url': 'https://www.tvaplus.ca/tva/le-baiser-du-barbu/le-baiser-du-barbu-886644190',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': '6354448043112',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Le Baiser du barbu',
|
||||||
|
'uploader_id': '5481942443001',
|
||||||
|
'upload_date': '20240606',
|
||||||
|
'timestamp': 1717694023,
|
||||||
|
'description': 'md5:025b1219086c1cbf4bc27e4e034e8b57',
|
||||||
|
'thumbnail': r're:https://.+\.jpg',
|
||||||
|
'episode': 'Le Baiser du barbu',
|
||||||
|
'tags': ['fullepisode', 'films'],
|
||||||
|
'duration': 6053.504,
|
||||||
|
'series': 'Le Baiser du barbu',
|
||||||
|
'channel': 'TVA',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
# reference_id also works with old account_id(5481942443001)
|
_BC_URL_TMPL = 'https://players.brightcove.net/5481942443001/default_default/index.html?videoId={}'
|
||||||
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
entity_id = self._match_id(url)
|
entity_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, entity_id)
|
webpage = self._download_webpage(url, entity_id)
|
||||||
entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
|
entity = self._search_nextjs_data(webpage, entity_id)['props']['pageProps']['staticEntity']
|
||||||
video_id = entity['videoId']
|
video_id = entity['videoId']
|
||||||
episode = strip_or_none(entity.get('name'))
|
episode = strip_or_none(entity.get('name'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': f'https://videos.tva.ca/details/_{video_id}',
|
'url': smuggle_url(self._BC_URL_TMPL.format(video_id), {'geo_countries': ['CA']}),
|
||||||
'ie_key': TVAIE.ie_key(),
|
'ie_key': BrightcoveNewIE.ie_key(),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': episode,
|
'title': episode,
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
|
|
||||||
|
|
||||||
class TVerIE(InfoExtractor):
|
class TVerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'skip': 'videos are only available for 7 days',
|
'skip': 'videos are only available for 7 days',
|
||||||
'url': 'https://tver.jp/episodes/ep83nf3w4p',
|
'url': 'https://tver.jp/episodes/ep83nf3w4p',
|
||||||
|
@ -23,6 +23,20 @@ class TVerIE(InfoExtractor):
|
||||||
'channel': 'テレビ朝日',
|
'channel': 'テレビ朝日',
|
||||||
},
|
},
|
||||||
'add_ie': ['BrightcoveNew'],
|
'add_ie': ['BrightcoveNew'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6359578055112',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
|
||||||
|
'timestamp': 1722279928,
|
||||||
|
'upload_date': '20240729',
|
||||||
|
'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
|
||||||
|
'uploader_id': '4774017240001',
|
||||||
|
'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
|
||||||
|
'duration': 670.571,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tver.jp/corner/f0103888',
|
'url': 'https://tver.jp/corner/f0103888',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -47,7 +61,15 @@ def _real_initialize(self):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||||
if video_type not in {'series', 'episodes'}:
|
|
||||||
|
if video_type == 'olympic/paris2024/video':
|
||||||
|
# Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
|
||||||
|
# https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
|
||||||
|
{'geo_countries': ['JP']}), 'BrightcoveNew')
|
||||||
|
|
||||||
|
elif video_type not in {'series', 'episodes'}:
|
||||||
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
|
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
|
||||||
video_id = self._match_id(self._search_regex(
|
video_id = self._match_id(self._search_regex(
|
||||||
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
|
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_field,
|
format_field,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
make_archive_id,
|
make_archive_id,
|
||||||
remove_end,
|
remove_end,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -107,7 +108,7 @@ def _extract_variant_formats(self, variant, video_id):
|
||||||
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
|
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
|
||||||
f = {
|
f = {
|
||||||
'url': variant_url,
|
'url': variant_url,
|
||||||
'format_id': 'http' + (f'-{tbr}' if tbr else ''),
|
'format_id': join_nonempty('http', tbr),
|
||||||
'tbr': tbr,
|
'tbr': tbr,
|
||||||
}
|
}
|
||||||
self._search_dimensions_in_video_url(f, variant_url)
|
self._search_dimensions_in_video_url(f, variant_url)
|
||||||
|
|
|
@ -49,6 +49,7 @@ class KnownDRMIE(UnsupportedInfoExtractor):
|
||||||
r'amazon\.(?:\w{2}\.)?\w+/gp/video',
|
r'amazon\.(?:\w{2}\.)?\w+/gp/video',
|
||||||
r'music\.amazon\.(?:\w{2}\.)?\w+',
|
r'music\.amazon\.(?:\w{2}\.)?\w+',
|
||||||
r'(?:watch|front)\.njpwworld\.com',
|
r'(?:watch|front)\.njpwworld\.com',
|
||||||
|
r'qub\.ca/vrai',
|
||||||
)
|
)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -149,6 +150,9 @@ class KnownDRMIE(UnsupportedInfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
|
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.qub.ca/vrai/l-effet-bocuse-d-or/saison-1/l-effet-bocuse-d-or-saison-1-bande-annonce-1098225063',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
426
yt_dlp/extractor/vidyard.py
Normal file
426
yt_dlp/extractor/vidyard.py
Normal file
|
@ -0,0 +1,426 @@
|
||||||
|
import functools
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
|
mimetype2ext,
|
||||||
|
parse_resolution,
|
||||||
|
str_or_none,
|
||||||
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class VidyardBaseIE(InfoExtractor):
|
||||||
|
_HEADERS = {'Referer': 'https://play.vidyard.com/'}
|
||||||
|
|
||||||
|
def _get_formats_and_subtitles(self, sources, video_id):
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
|
||||||
|
def add_hls_fmts_and_subs(m3u8_url):
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
m3u8_url, video_id, 'mp4', m3u8_id='hls', headers=self._HEADERS, fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
hls_list = isinstance(sources, dict) and sources.pop('hls', None)
|
||||||
|
if master_m3u8_url := traverse_obj(
|
||||||
|
hls_list, (lambda _, v: v['profile'] == 'auto', 'url', {url_or_none}, any)):
|
||||||
|
add_hls_fmts_and_subs(master_m3u8_url)
|
||||||
|
if not formats: # These are duplicate and unnecesary requests if we got 'auto' hls fmts
|
||||||
|
for variant_m3u8_url in traverse_obj(hls_list, (..., 'url', {url_or_none})):
|
||||||
|
add_hls_fmts_and_subs(variant_m3u8_url)
|
||||||
|
|
||||||
|
for source_type, source_list in traverse_obj(sources, ({dict.items}, ...)):
|
||||||
|
for source in traverse_obj(source_list, lambda _, v: url_or_none(v['url'])):
|
||||||
|
profile = source.get('profile')
|
||||||
|
formats.append({
|
||||||
|
'url': source['url'],
|
||||||
|
'ext': mimetype2ext(source.get('mimeType'), default=None),
|
||||||
|
'format_id': join_nonempty('http', source_type, profile),
|
||||||
|
**parse_resolution(profile),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._remove_duplicate_formats(formats)
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
|
def _get_direct_subtitles(self, caption_json):
|
||||||
|
subs = {}
|
||||||
|
for caption in traverse_obj(caption_json, lambda _, v: url_or_none(v['vttUrl'])):
|
||||||
|
subs.setdefault(caption.get('language') or 'und', []).append({
|
||||||
|
'url': caption['vttUrl'],
|
||||||
|
'name': caption.get('name'),
|
||||||
|
})
|
||||||
|
|
||||||
|
return subs
|
||||||
|
|
||||||
|
def _fetch_video_json(self, video_id):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://play.vidyard.com/player/{video_id}.json', video_id)['payload']
|
||||||
|
|
||||||
|
def _process_video_json(self, json_data, video_id):
|
||||||
|
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], video_id)
|
||||||
|
self._merge_subtitles(self._get_direct_subtitles(json_data.get('captions')), target=subtitles)
|
||||||
|
|
||||||
|
return {
|
||||||
|
**traverse_obj(json_data, {
|
||||||
|
'id': ('facadeUuid', {str}),
|
||||||
|
'display_id': ('videoId', {int}, {str_or_none}),
|
||||||
|
'title': ('name', {str}),
|
||||||
|
'description': ('description', {str}, {unescapeHTML}, {lambda x: x or None}),
|
||||||
|
'duration': ((
|
||||||
|
('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||||
|
('seconds', {int_or_none})), any),
|
||||||
|
'thumbnails': ('thumbnailUrls', ('small', 'normal'), {'url': {url_or_none}}),
|
||||||
|
'tags': ('tags', ..., 'name', {str}),
|
||||||
|
}),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'http_headers': self._HEADERS,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VidyardIE(VidyardBaseIE):
|
||||||
|
_VALID_URL = [
|
||||||
|
r'https?://[\w-]+(?:\.hubs)?\.vidyard\.com/watch/(?P<id>[\w-]+)',
|
||||||
|
r'https?://(?:embed|share)\.vidyard\.com/share/(?P<id>[\w-]+)',
|
||||||
|
r'https?://play\.vidyard\.com/(?:player/)?(?P<id>[\w-]+)',
|
||||||
|
]
|
||||||
|
_EMBED_REGEX = [r'<iframe[^>]* src=["\'](?P<url>(?:https?:)?//play\.vidyard\.com/[\w-]+)']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vyexample03.hubs.vidyard.com/watch/oTDMPlUv--51Th455G5u7Q',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'oTDMPlUv--51Th455G5u7Q',
|
||||||
|
'display_id': '50347',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Homepage Video',
|
||||||
|
'description': 'Look I changed the description.',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
|
||||||
|
'duration': 99,
|
||||||
|
'tags': ['these', 'are', 'all', 'tags'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://share.vidyard.com/watch/PaQzDAT1h8JqB8ivEu2j6Y?',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PaQzDAT1h8JqB8ivEu2j6Y',
|
||||||
|
'display_id': '9281024',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Inline Embed',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
|
||||||
|
'duration': 41.186,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://embed.vidyard.com/share/oTDMPlUv--51Th455G5u7Q',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'oTDMPlUv--51Th455G5u7Q',
|
||||||
|
'display_id': '50347',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Homepage Video',
|
||||||
|
'description': 'Look I changed the description.',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
|
||||||
|
'duration': 99,
|
||||||
|
'tags': ['these', 'are', 'all', 'tags'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# First video from playlist below
|
||||||
|
'url': 'https://embed.vidyard.com/share/SyStyHtYujcBHe5PkZc5DL',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'SyStyHtYujcBHe5PkZc5DL',
|
||||||
|
'display_id': '41974005',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||||
|
'description': r're:In this video, you will learn how to prepare the frame.+',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
|
||||||
|
'duration': 258.666,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Playlist
|
||||||
|
'url': 'https://thelink.hubs.vidyard.com/watch/pwu7pCYWSwAnPxs8nDoFrE',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pwu7pCYWSwAnPxs8nDoFrE',
|
||||||
|
'title': 'PLAYLIST - Palm Beach Shutters- Bi-Fold Track System Installation',
|
||||||
|
'entries': [{
|
||||||
|
'id': 'SyStyHtYujcBHe5PkZc5DL',
|
||||||
|
'display_id': '41974005',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
|
||||||
|
'duration': 258.666,
|
||||||
|
}, {
|
||||||
|
'id': '1Fw4B84jZTXLXWqkE71RiM',
|
||||||
|
'display_id': '5861113',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Palm Beach - Bi-Fold Track System "Frame Installation"',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861113/29CJ54s5g1_aP38zkKLHew_small.jpg',
|
||||||
|
'duration': 167.858,
|
||||||
|
}, {
|
||||||
|
'id': 'DqP3wBvLXSpxrcqpT5kEeo',
|
||||||
|
'display_id': '41976334',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Install the Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861090/RwG2VaTylUa6KhSTED1r1Q_small.png',
|
||||||
|
'duration': 94.229,
|
||||||
|
}, {
|
||||||
|
'id': 'opfybfxpzQArxqtQYB6oBU',
|
||||||
|
'display_id': '41976364',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Install the Panel for Palm Beach Polysatin Shutters With BiFold Track',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860926/JIOaJR08dM4QgXi_iQ2zGA_small.png',
|
||||||
|
'duration': 191.467,
|
||||||
|
}, {
|
||||||
|
'id': 'rWrXvkbTNNaNqD6189HJya',
|
||||||
|
'display_id': '41976382',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Adjust the Panels for Palm Beach Polysatin Shutters With BiFold Track',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860687/CwHxBv4UudAhOh43FVB4tw_small.png',
|
||||||
|
'duration': 138.155,
|
||||||
|
}, {
|
||||||
|
'id': 'eYPTB521MZ9TPEArSethQ5',
|
||||||
|
'display_id': '41976409',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Assemble and Install the Valance for Palm Beach Polysatin Shutters With BiFold Track',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861425/0y68qlMU4O5VKU7bJ8i_AA_small.png',
|
||||||
|
'duration': 148.224,
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}, {
|
||||||
|
# Non hubs.vidyard.com playlist
|
||||||
|
'url': 'https://salesforce.vidyard.com/watch/d4vqPjs7Q5EzVEis5QT3jd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd4vqPjs7Q5EzVEis5QT3jd',
|
||||||
|
'title': 'How To: Service Cloud: Import External Content in Lightning Knowledge',
|
||||||
|
'entries': [{
|
||||||
|
'id': 'mcjDpSZir2iSttbvFkx6Rv',
|
||||||
|
'display_id': '29479036',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Welcome to this Expert Coaching Series',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/ouyQi9WuwyiOupChUWNmjQ/7170d3485ba602e012df05_small.jpg',
|
||||||
|
'duration': 38.205,
|
||||||
|
}, {
|
||||||
|
'id': '84bPYwpg243G6xYEfJdYw9',
|
||||||
|
'display_id': '21820704',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chapter 1 - Title + Agenda',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/HFPN0ZgQq4Ow8BghGcQSow/bfaa30123c8f6601e7d7f2_small.jpg',
|
||||||
|
'duration': 98.016,
|
||||||
|
}, {
|
||||||
|
'id': 'nP17fMuvA66buVHUrzqjTi',
|
||||||
|
'display_id': '21820707',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chapter 2 - Import Options',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rGRIF5nFjPI9OOA2qJ_Dbg/86a8d02bfec9a566845dd4_small.jpg',
|
||||||
|
'duration': 199.136,
|
||||||
|
}, {
|
||||||
|
'id': 'm54EcwXdpA5gDBH5rgCYoV',
|
||||||
|
'display_id': '21820710',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chapter 3 - Importing Article Translations',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/IVX4XR8zpSsiNIHx45kz-A/1ccbf8a29a33856d06b3ed_small.jpg',
|
||||||
|
'duration': 184.352,
|
||||||
|
}, {
|
||||||
|
'id': 'j4nzS42oq4hE9oRV73w3eQ',
|
||||||
|
'display_id': '21820716',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chapter 4 - Best Practices',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/BtrRrQpRDLbA4AT95YQyog/1f1e6b8e7fdc3fa95ec8d3_small.jpg',
|
||||||
|
'duration': 296.960,
|
||||||
|
}, {
|
||||||
|
'id': 'y28PYfW5pftvers9PXzisC',
|
||||||
|
'display_id': '21820727',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chapter 5 - Migration Steps',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/K2CdQOXDfLcrVTF60r0bdw/a09239ada28b6ffce12b1f_small.jpg',
|
||||||
|
'duration': 620.640,
|
||||||
|
}, {
|
||||||
|
'id': 'YWU1eQxYvhj29SjYoPw5jH',
|
||||||
|
'display_id': '21820733',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Chapter 6 - Demo',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rsmhP-cO8dAa8ilvFGCX0g/7911ef415167cd14032068_small.jpg',
|
||||||
|
'duration': 631.456,
|
||||||
|
}, {
|
||||||
|
'id': 'nmEvVqpwdJUgb74zKsLGxn',
|
||||||
|
'display_id': '29479037',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Schedule Your Follow-Up',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/Rtwc7X4PEkF4Ae5kHi-Jvw/174ebed3f34227b1ffa1d0_small.jpg',
|
||||||
|
'duration': 33.608,
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
'playlist_count': 8,
|
||||||
|
}, {
|
||||||
|
# URL of iframe embed src
|
||||||
|
'url': 'https://play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'iDqTwWGrd36vaLuaCY3nTs',
|
||||||
|
'display_id': '9281009',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lightbox Embed',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
|
||||||
|
'duration': 39.035,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Player JSON URL
|
||||||
|
'url': 'https://play.vidyard.com/player/7GAApnNNbcZZ46k6JqJQSh.json?disable_analytics=0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7GAApnNNbcZZ46k6JqJQSh',
|
||||||
|
'display_id': '820026',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Art of Storytelling: How to Deliver Your Brand Story with Content & Social',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/MhbE-5sEFQu4x3fI6FkNlA/41eb5717c557cd19456910_small.jpg',
|
||||||
|
'duration': 2153.013,
|
||||||
|
'tags': ['Summit2017'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://share.vidyard.com/share/diYeo6YR2yiGgL8odvS8Ri',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.vidyard.com/FFlz3ZpxhIfKQ1fd9DAryA',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.vidyard.com/qhMAu5A76GZVrFzOPgSf9A/type/standalone',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
# URL containing inline/lightbox embedded video
|
||||||
|
'url': 'https://resources.altium.com/p/2-the-extreme-importance-of-pc-board-stack-up',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'GDx1oXrFWj4XHbipfoXaMn',
|
||||||
|
'display_id': '3225198',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The Extreme Importance of PC Board Stack Up',
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/73_Q3_hBexWX7Og1sae6cg/9998fa4faec921439e2c04_small.jpg',
|
||||||
|
'duration': 3422.742,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# <script ... id="vidyard_embed_code_DXx2sW4WaLA6hTdGFz7ja8" src="//play.vidyard.com/DXx2sW4WaLA6hTdGFz7ja8.js?
|
||||||
|
'url': 'http://videos.vivint.com/watch/DXx2sW4WaLA6hTdGFz7ja8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'DXx2sW4WaLA6hTdGFz7ja8',
|
||||||
|
'display_id': '2746529',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How To Powercycle the Smart Hub Panel',
|
||||||
|
'duration': 30.613,
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/_-6cw8xQUJ3qiCs_JENc_A/b21d7a5e47967f49399d30_small.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# <script id="vidyard_embed_code_MIBHhiLVTxga7wqLsuoDjQ" src="//embed.vidyard.com/embed/MIBHhiLVTxga7wqLsuoDjQ/inline?v=2.1">
|
||||||
|
'url': 'https://www.babypips.com/learn/forex/introduction-to-metatrader4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MIBHhiLVTxga7wqLsuoDjQ',
|
||||||
|
'display_id': '20291',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lesson 1 - Opening an MT4 Account',
|
||||||
|
'description': 'Never heard of MetaTrader4? Here\'s the 411 on the popular trading platform!',
|
||||||
|
'duration': 168,
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/20291/IM-G2WXQR9VBLl2Cmzvftg_small.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# <iframe ... src="//play.vidyard.com/d61w8EQoZv1LDuPxDkQP2Q/type/background?preview=1"
|
||||||
|
'url': 'https://www.avaya.com/en/',
|
||||||
|
'info_dict': {
|
||||||
|
# These values come from the generic extractor and don't matter
|
||||||
|
'id': str,
|
||||||
|
'title': str,
|
||||||
|
'age_limit': 0,
|
||||||
|
'upload_date': str,
|
||||||
|
'description': str,
|
||||||
|
'thumbnail': str,
|
||||||
|
'timestamp': float,
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd61w8EQoZv1LDuPxDkQP2Q',
|
||||||
|
'display_id': '42456529',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'GettyImages-1027',
|
||||||
|
'duration': 6.0,
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42061563/p6bY08d2N4e4IDz-7J4_wkgsPq3-qgcx_small.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VAsYDi7eiqZRbHodUA2meC',
|
||||||
|
'display_id': '42456569',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'GettyImages-1325598833',
|
||||||
|
'duration': 6.083,
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42052358/y3qrbDpn_2quWr_5XBi7yzS3UvEI__ZM_small.jpg',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
# <div class="vidyard-player-embed" data-uuid="vpCWTVHw3qrciLtVY94YkS"
|
||||||
|
'url': 'https://www.gogoair.com/',
|
||||||
|
'info_dict': {
|
||||||
|
# These values come from the generic extractor and don't matter
|
||||||
|
'id': str,
|
||||||
|
'title': str,
|
||||||
|
'description': str,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vpCWTVHw3qrciLtVY94YkS',
|
||||||
|
'display_id': '40780699',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Upgrade to AVANCE 100% worth it - Jason Talley, Owner and Pilot, Testimonial',
|
||||||
|
'description': 'md5:f609824839439a51990cef55ffc472aa',
|
||||||
|
'duration': 70.737,
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/40780699/KzjfYZz5MZl2gHF_e-4i2c6ib1cLDweQ_small.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'xAmV9AsLbnitCw35paLBD8',
|
||||||
|
'display_id': '31130867',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Brad Keselowski goes faster with Gogo AVANCE inflight Wi-Fi',
|
||||||
|
'duration': 132.565,
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/31130867/HknyDtLdm2Eih9JZ4A5XLjhfBX_6HRw5_small.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'RkkrFRNxfP79nwCQavecpF',
|
||||||
|
'display_id': '39009815',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Live Demo of Gogo Galileo',
|
||||||
|
'description': 'md5:e2df497236f4e12c3fef8b392b5f23e0',
|
||||||
|
'duration': 112.128,
|
||||||
|
'thumbnail': 'https://cdn.vidyard.com/thumbnails/38144873/CWLlxfUbJ4Gh0ThbUum89IsEM4yupzMb_small.jpg',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'playlist_count': 3,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
# Handle protocol-less embed URLs
|
||||||
|
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||||
|
if embed_url.startswith('//'):
|
||||||
|
embed_url = f'https:{embed_url}'
|
||||||
|
yield embed_url
|
||||||
|
|
||||||
|
# Extract inline/lightbox embeds
|
||||||
|
for embed_element in re.findall(
|
||||||
|
r'(<(?:img|div)[^>]* class=(["\'])(?:[^>"\']* )?vidyard-player-embed(?: [^>"\']*)?\2[^>]+>)', webpage):
|
||||||
|
if video_id := extract_attributes(embed_element[0]).get('data-uuid'):
|
||||||
|
yield f'https://play.vidyard.com/{video_id}'
|
||||||
|
|
||||||
|
for embed_id in re.findall(r'<script[^>]* id=["\']vidyard_embed_code_([\w-]+)["\']', webpage):
|
||||||
|
yield f'https://play.vidyard.com/{embed_id}'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
video_json = self._fetch_video_json(video_id)
|
||||||
|
|
||||||
|
if len(video_json['chapters']) == 1:
|
||||||
|
return self._process_video_json(video_json['chapters'][0], video_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
[self._process_video_json(chapter, video_id) for chapter in video_json['chapters']],
|
||||||
|
str(video_json['playerUuid']), video_json.get('name'))
|
|
@ -5,6 +5,7 @@
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
)
|
)
|
||||||
|
@ -120,7 +121,7 @@ def _real_extract(self, url):
|
||||||
'height', default=None))
|
'height', default=None))
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_asset_url,
|
'url': video_asset_url,
|
||||||
'format_id': 'http{}'.format(f'-{bitrate}' if bitrate else ''),
|
'format_id': join_nonempty('http', bitrate),
|
||||||
'tbr': bitrate,
|
'tbr': bitrate,
|
||||||
'height': height,
|
'height': height,
|
||||||
'vcodec': video_asset.get('codec'),
|
'vcodec': video_asset.get('codec'),
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue