mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-19 14:49:15 +00:00
Merge branch 'yt-dlp:master' into pr/6498
This commit is contained in:
commit
194bc49c55
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting that yt-dlp is broken on a **supported** site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -64,7 +64,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -72,8 +72,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -76,7 +76,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -84,8 +84,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm requesting a site-specific feature
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -72,7 +72,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -80,8 +80,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -57,7 +57,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -65,8 +65,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
|
@ -20,7 +20,7 @@ body:
|
|||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
@ -53,7 +53,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -61,7 +61,7 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
|
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
|
@ -26,7 +26,7 @@ body:
|
|||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
@ -59,7 +59,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -67,7 +67,7 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.07.06, Current version: 2023.07.06
|
||||
yt-dlp is up to date (2023.07.06)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
|
65
.github/workflows/codeql.yml
vendored
Normal file
65
.github/workflows/codeql.yml
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ 'master', 'gh-pages', 'release' ]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ 'master' ]
|
||||
schedule:
|
||||
- cron: '59 11 * * 5'
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'python' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Use only 'java' to analyze code written in Java, Kotlin or both
|
||||
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
|
||||
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
|
||||
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
||||
# queries: security-extended,security-and-quality
|
||||
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
|
||||
# If the Autobuild fails above, remove it and uncomment the following three lines.
|
||||
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
|
||||
|
||||
# - run: |
|
||||
# echo "Run, Build Application using script"
|
||||
# ./location_of_script_within_repo/buildscript.sh
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
|
@ -460,3 +460,10 @@ berkanteber
|
|||
OverlordQ
|
||||
rexlambert22
|
||||
Ti4eeT4e
|
||||
AmanSal1
|
||||
bbilly1
|
||||
meliber
|
||||
nnoboa
|
||||
rdamas
|
||||
RfadnjdExt
|
||||
urectanc
|
||||
|
|
58
Changelog.md
58
Changelog.md
|
@ -4,11 +4,65 @@ # Changelog
|
|||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2023.07.06
|
||||
|
||||
#### Important changes
|
||||
- Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)
|
||||
- `--add-header Cookie:` is deprecated and auto-scoped to input URL domains
|
||||
- Cookies are scoped when passed to external downloaders
|
||||
- Add `cookies` field to info.json and deprecate `http_headers.Cookie`
|
||||
|
||||
#### Core changes
|
||||
- [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56](https://github.com/yt-dlp/yt-dlp/commit/e59e20744eb32ce4b6ea0dece7c673be8376a710) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Change how `Cookie` headers are handled](https://github.com/yt-dlp/yt-dlp/commit/3121512228487c9c690d3d39bfd2579addf96e07) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Prevent `Cookie` leaks on HTTP redirect](https://github.com/yt-dlp/yt-dlp/commit/f8b4bcc0a791274223723488bfbfc23ea3276641) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- **formats**: [Fix best fallback for storyboards](https://github.com/yt-dlp/yt-dlp/commit/906c0bdcd8974340d619e99ccd613c163eb0d0c2) by [pukkandan](https://github.com/pukkandan)
|
||||
- **outtmpl**: [Pad `playlist_index` etc even when with internal formatting](https://github.com/yt-dlp/yt-dlp/commit/47bcd437247152e0af5b3ebc5592db7bb66855c2) by [pukkandan](https://github.com/pukkandan)
|
||||
- **utils**: clean_podcast_url: [Handle protocol in redirect URL](https://github.com/yt-dlp/yt-dlp/commit/91302ed349f34dc26cc1d661bb45a4b71f4417f7) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Extractor changes
|
||||
- **abc**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8f05fbae2a79ce0713077ccc68b354e63216bf20) ([#7434](https://github.com/yt-dlp/yt-dlp/issues/7434)) by [meliber](https://github.com/meliber)
|
||||
- **AdultSwim**: [Extract subtitles from m3u8](https://github.com/yt-dlp/yt-dlp/commit/5e16cf92eb496b7c1541a6b1d727cb87542984db) ([#7421](https://github.com/yt-dlp/yt-dlp/issues/7421)) by [nnoboa](https://github.com/nnoboa)
|
||||
- **crunchyroll**: music: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5b4b92769afcc398475e481bfa839f1158902fe9) ([#7439](https://github.com/yt-dlp/yt-dlp/issues/7439)) by [AmanSal1](https://github.com/AmanSal1), [rdamas](https://github.com/rdamas)
|
||||
- **Douyin**: [Fix extraction from webpage](https://github.com/yt-dlp/yt-dlp/commit/a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2) by [bashonly](https://github.com/bashonly)
|
||||
- **googledrive**: [Fix source format extraction](https://github.com/yt-dlp/yt-dlp/commit/3b7f5300c577fef40464d46d4e4037a69d51fe82) ([#7395](https://github.com/yt-dlp/yt-dlp/issues/7395)) by [RfadnjdExt](https://github.com/RfadnjdExt)
|
||||
- **kick**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/ef8509c300ea50da86aea447eb214d3d6f6db6bb) by [bashonly](https://github.com/bashonly)
|
||||
- **qdance**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f0a1ff118145b6449982ba401f9a9f656ecd8062) ([#7420](https://github.com/yt-dlp/yt-dlp/issues/7420)) by [bashonly](https://github.com/bashonly)
|
||||
- **sbs**: [Python 3.7 compat](https://github.com/yt-dlp/yt-dlp/commit/f393bbe724b1fc6c7f754a5da507e807b2b40ad2) by [pukkandan](https://github.com/pukkandan)
|
||||
- **stacommu**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/af1fd12f675220df6793fc019dff320bc76e8080) ([#7432](https://github.com/yt-dlp/yt-dlp/issues/7432)) by [urectanc](https://github.com/urectanc)
|
||||
- **twitter**
|
||||
- [Fix unauthenticated extraction](https://github.com/yt-dlp/yt-dlp/commit/49296437a8e5fa91dacb5446e51ab588474c85d3) ([#7476](https://github.com/yt-dlp/yt-dlp/issues/7476)) by [bashonly](https://github.com/bashonly)
|
||||
- spaces: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1cffd621cb371f1563563cfb2fe37d137e8a7bee) ([#7512](https://github.com/yt-dlp/yt-dlp/issues/7512)) by [bashonly](https://github.com/bashonly)
|
||||
- **vidlii**: [Handle relative URLs](https://github.com/yt-dlp/yt-dlp/commit/ad8902f616ad2541f9b9626738f1393fad89a64c) by [pukkandan](https://github.com/pukkandan)
|
||||
- **vk**: VKPlay, VKPlayLive: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/8776349ef6b1f644584a92dfa00a05208a48edc4) ([#7358](https://github.com/yt-dlp/yt-dlp/issues/7358)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **youtube**
|
||||
- [Add extractor-arg `formats`](https://github.com/yt-dlp/yt-dlp/commit/58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Avoid false DRM detection](https://github.com/yt-dlp/yt-dlp/commit/94ed638a437fc766699d440e978982e24ce6a30a) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Fix comments' `is_favorited`](https://github.com/yt-dlp/yt-dlp/commit/89bed013741a776506f60380b7fd89d27d0710b4) ([#7390](https://github.com/yt-dlp/yt-dlp/issues/7390)) by [bbilly1](https://github.com/bbilly1)
|
||||
- [Ignore incomplete data for comment threads by default](https://github.com/yt-dlp/yt-dlp/commit/4dc4d8473c085900edc841c87c20041233d25b1f) ([#7475](https://github.com/yt-dlp/yt-dlp/issues/7475)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Process `post_live` over 2 hours](https://github.com/yt-dlp/yt-dlp/commit/d949c10c45bfc359bdacd52e6a180169b8128958) by [pukkandan](https://github.com/pukkandan)
|
||||
- stories: [Remove](https://github.com/yt-dlp/yt-dlp/commit/90db9a3c00ca80492c6a58c542e4cbf4c2710866) ([#7459](https://github.com/yt-dlp/yt-dlp/issues/7459)) by [pukkandan](https://github.com/pukkandan)
|
||||
- tab: [Support shorts-only playlists](https://github.com/yt-dlp/yt-dlp/commit/fcbc9ed760be6e3455bbadfaf277b4504b06f068) ([#7425](https://github.com/yt-dlp/yt-dlp/issues/7425)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
#### Downloader changes
|
||||
- **aria2c**: [Add `--no-conf`](https://github.com/yt-dlp/yt-dlp/commit/8a8af356e3bba98a7f7d333aff0777d5d92130c8) by [pukkandan](https://github.com/pukkandan)
|
||||
- **external**: [Scope cookies](https://github.com/yt-dlp/yt-dlp/commit/1ceb657bdd254ad961489e5060f2ccc7d556b729) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
|
||||
- **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Misc. changes
|
||||
- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf)
|
||||
- **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan)
|
||||
- **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan)
|
||||
- **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
### 2023.06.22
|
||||
|
||||
#### Core changes
|
||||
- [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan)
|
||||
- Support negative time-ranges
|
||||
- Add `*from-url` to obey time-ranges in URL
|
||||
- [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Extractor changes
|
||||
|
@ -19,7 +73,7 @@ #### Extractor changes
|
|||
- **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb)
|
||||
- **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk)
|
||||
- **youtube**
|
||||
- [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142)
|
||||
- [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) by [pukkandan](https://github.com/pukkandan)
|
||||
- IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively
|
||||
- IOS also has higher bit-rate 'premium' formats though they are not labeled as such
|
||||
- [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan)
|
||||
|
@ -27,7 +81,7 @@ #### Extractor changes
|
|||
- [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Misc. changes
|
||||
- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700)
|
||||
- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) by [pukkandan](https://github.com/pukkandan)
|
||||
- **cleanup**
|
||||
- Miscellaneous
|
||||
- [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly)
|
||||
|
|
2
Makefile
2
Makefile
|
@ -74,7 +74,7 @@ offlinetest: codetest
|
|||
$(PYTHON) -m pytest -k "not download"
|
||||
|
||||
# XXX: This is hard to maintain
|
||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies
|
||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking
|
||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||
mkdir -p zip
|
||||
for d in $(CODE_FOLDERS) ; do \
|
||||
|
|
21
README.md
21
README.md
|
@ -12,7 +12,7 @@
|
|||
[![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License")
|
||||
[![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
|
||||
[![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/pulse/monthly "Last activity")
|
||||
|
||||
</div>
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
@ -76,7 +76,7 @@
|
|||
|
||||
# NEW FEATURES
|
||||
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/yt-dlp/yt-dlp/commit/42f2d4) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
|
@ -1323,7 +1323,7 @@ # OUTPUT TEMPLATE
|
|||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch of when the information extraction was completed
|
||||
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`, padded with leading zeros to 5 digits
|
||||
- `video_autonumber` (numeric): Number that will be increased with each video
|
||||
- `n_entries` (numeric): Total number of extracted items in the playlist
|
||||
- `playlist_id` (string): Identifier of the playlist that contains the video
|
||||
|
@ -1509,7 +1509,7 @@ # FORMAT SELECTION
|
|||
|
||||
## Filtering Formats
|
||||
|
||||
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
|
||||
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"` since filters without a selector are interpreted as `best`).
|
||||
|
||||
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
|
||||
|
||||
|
@ -1545,7 +1545,7 @@ ## Filtering Formats
|
|||
|
||||
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
|
||||
Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
|
||||
|
||||
|
@ -1569,7 +1569,7 @@ ## Sorting Formats
|
|||
- `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
|
||||
- `ext`: Equivalent to `vext,aext`
|
||||
- `filesize`: Exact filesize, if known in advance
|
||||
- `fs_approx`: Approximate filesize calculated from the manifests
|
||||
- `fs_approx`: Approximate filesize
|
||||
- `size`: Exact filesize if available, otherwise approximate filesize
|
||||
- `height`: Height of video
|
||||
- `width`: Width of video
|
||||
|
@ -1580,7 +1580,7 @@ ## Sorting Formats
|
|||
- `tbr`: Total average bitrate in KBit/s
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `br`: Equivalent to using `tbr,vbr,abr`
|
||||
- `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
|
||||
- `asr`: Audio sample rate in Hz
|
||||
|
||||
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
|
||||
|
@ -1805,8 +1805,7 @@ #### youtube
|
|||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `include_duplicate_formats`: Extract formats with identical content but different URLs or protocol. This is useful if some of the formats are unavailable or throttled.
|
||||
* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8)
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests
|
||||
|
||||
|
@ -1856,7 +1855,7 @@ #### rokfinchannel
|
|||
#### twitter
|
||||
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
||||
|
||||
#### wrestleuniverse
|
||||
#### stacommu, wrestleuniverse
|
||||
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
||||
|
||||
#### twitch
|
||||
|
@ -1954,7 +1953,7 @@ # EMBEDDING YT-DLP
|
|||
ydl.download(URLS)
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L184).
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L183) or `help(yt_dlp.YoutubeDL)` in a Python shell. If you are already familiar with the CLI, you can use [`devscripts/cli_to_api.py`](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) to translate any CLI switches to `YoutubeDL` params.
|
||||
|
||||
**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information)
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
[
|
||||
{
|
||||
"action": "add",
|
||||
"when": "776d1c3f0c9b00399896dd2e40e78e9a43218109",
|
||||
"when": "29cb20bd563c02671b31dd840139e93dd37150a1",
|
||||
"short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`"
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "776d1c3f0c9b00399896dd2e40e78e9a43218109",
|
||||
"when": "5038f6d713303e0967d002216e7a88652401c22a",
|
||||
"short": "[priority] **YouTube throttling fixes!**"
|
||||
},
|
||||
{
|
||||
|
@ -38,13 +38,15 @@
|
|||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "7b37e8b23691613f331bd4ebc9d639dd6f93c972",
|
||||
"short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL"
|
||||
"when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56",
|
||||
"short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "1e75d97db21152acc764b30a688e516f04b8a142",
|
||||
"short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such"
|
||||
"short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
|
@ -55,6 +57,17 @@
|
|||
{
|
||||
"action": "change",
|
||||
"when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700",
|
||||
"short": "[misc] Revert \"Add automatic duplicate issue detection\""
|
||||
"short": "[misc] Revert \"Add automatic duplicate issue detection\"",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "1ceb657bdd254ad961489e5060f2ccc7d556b729",
|
||||
"short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
|
||||
"short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b"
|
||||
}
|
||||
]
|
||||
|
|
|
@ -53,7 +53,9 @@ def commit_lookup(cls):
|
|||
'cookies',
|
||||
'core',
|
||||
'dependencies',
|
||||
'formats',
|
||||
'jsinterp',
|
||||
'networking',
|
||||
'outtmpl',
|
||||
'plugins',
|
||||
'update',
|
||||
|
@ -68,9 +70,9 @@ def commit_lookup(cls):
|
|||
'misc',
|
||||
'test',
|
||||
},
|
||||
cls.EXTRACTOR: {'extractor'},
|
||||
cls.DOWNLOADER: {'downloader'},
|
||||
cls.POSTPROCESSOR: {'postprocessor'},
|
||||
cls.EXTRACTOR: {'extractor', 'ie'},
|
||||
cls.DOWNLOADER: {'downloader', 'fd'},
|
||||
cls.POSTPROCESSOR: {'postprocessor', 'pp'},
|
||||
}.items()
|
||||
for name in names
|
||||
}
|
||||
|
@ -252,6 +254,7 @@ class CommitRange:
|
|||
(?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
|
||||
''', re.VERBOSE | re.DOTALL)
|
||||
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
|
||||
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
|
||||
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
|
||||
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
|
||||
|
||||
|
@ -279,7 +282,7 @@ def _get_commits_and_fixes(self, default_author):
|
|||
self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
|
||||
f'{self._start}..{self._end}' if self._start else self._end).stdout
|
||||
|
||||
commits = {}
|
||||
commits, reverts = {}, {}
|
||||
fixes = defaultdict(list)
|
||||
lines = iter(result.splitlines(False))
|
||||
for i, commit_hash in enumerate(lines):
|
||||
|
@ -300,6 +303,11 @@ def _get_commits_and_fixes(self, default_author):
|
|||
logger.debug(f'Reached Release commit, breaking: {commit}')
|
||||
break
|
||||
|
||||
revert_match = self.REVERT_RE.fullmatch(commit.short)
|
||||
if revert_match:
|
||||
reverts[revert_match.group(1)] = commit
|
||||
continue
|
||||
|
||||
fix_match = self.FIXES_RE.search(commit.short)
|
||||
if fix_match:
|
||||
commitish = fix_match.group(1)
|
||||
|
@ -307,6 +315,13 @@ def _get_commits_and_fixes(self, default_author):
|
|||
|
||||
commits[commit.hash] = commit
|
||||
|
||||
for commitish, revert_commit in reverts.items():
|
||||
reverted = commits.pop(commitish, None)
|
||||
if reverted:
|
||||
logger.debug(f'{commit} fully reverted {reverted}')
|
||||
else:
|
||||
commits[revert_commit.hash] = revert_commit
|
||||
|
||||
for commitish, fix_commits in fixes.items():
|
||||
if commitish in commits:
|
||||
hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
|
||||
|
|
|
@ -8,7 +8,6 @@ ignore = E402,E501,E731,E741,W503
|
|||
max_line_length = 120
|
||||
per_file_ignores =
|
||||
devscripts/lazy_load_template.py: F401
|
||||
yt_dlp/utils/__init__.py: F401, F403
|
||||
|
||||
|
||||
[autoflake]
|
||||
|
|
3
setup.py
3
setup.py
|
@ -65,7 +65,8 @@ def py2exe_params():
|
|||
'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy'],
|
||||
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
|
||||
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
|
||||
},
|
||||
'zipfile': None,
|
||||
}
|
||||
|
|
|
@ -1136,6 +1136,7 @@ # Supported sites
|
|||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **QDance**: [*qdance*](## "netrc machine")
|
||||
- **QingTing**
|
||||
- **qqmusic**: QQ音乐
|
||||
- **qqmusic:album**: QQ音乐 - 专辑
|
||||
|
@ -1363,6 +1364,8 @@ # Supported sites
|
|||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **StacommuLive**: [*stacommu*](## "netrc machine")
|
||||
- **StacommuVOD**: [*stacommu*](## "netrc machine")
|
||||
- **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **StarTrek**
|
||||
|
@ -1647,6 +1650,8 @@ # Supported sites
|
|||
- **vk**: [*vk*](## "netrc machine") VK
|
||||
- **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos
|
||||
- **vk:wallpost**: [*vk*](## "netrc machine")
|
||||
- **VKPlay**
|
||||
- **VKPlayLive**
|
||||
- **vm.tiktok**
|
||||
- **Vocaroo**
|
||||
- **Vodlocker**
|
||||
|
@ -1800,7 +1805,6 @@ # Supported sites
|
|||
- **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
|
||||
- **youtube:search_url**: YouTube search URLs with sorting and filter support
|
||||
- **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video)
|
||||
- **youtube:stories**: YouTube channel stories; "ytstories:" prefix
|
||||
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
|
||||
- **youtube:tab**: YouTube Tabs
|
||||
- **youtube:user**: YouTube user videos; "ytuser:" prefix
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
import copy
|
||||
import json
|
||||
|
||||
from test.helper import FakeYDL, assertRegexpMatches
|
||||
from test.helper import FakeYDL, assertRegexpMatches, try_rm
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_os_name
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
|
@ -24,6 +24,7 @@
|
|||
int_or_none,
|
||||
match_filter_func,
|
||||
)
|
||||
from yt_dlp.utils.traversal import traverse_obj
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
|
@ -684,7 +685,8 @@ def test(tmpl, expected, *, info=None, **params):
|
|||
test('%(id)s.%(ext)s', '1234.mp4')
|
||||
test('%(duration_string)s', ('27:46:40', '27-46-40'))
|
||||
test('%(resolution)s', '1080p')
|
||||
test('%(playlist_index)s', '001')
|
||||
test('%(playlist_index|)s', '001')
|
||||
test('%(playlist_index&{}!)s', '1!')
|
||||
test('%(playlist_autonumber)s', '02')
|
||||
test('%(autonumber)s', '00001')
|
||||
test('%(autonumber+2)03d', '005', autonumber_start=3)
|
||||
|
@ -1213,6 +1215,129 @@ def _real_extract(self, url):
|
|||
self.assertEqual(downloaded['extractor'], 'Video')
|
||||
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||
|
||||
def test_header_cookies(self):
|
||||
from http.cookiejar import Cookie
|
||||
|
||||
ydl = FakeYDL()
|
||||
ydl.report_warning = lambda *_, **__: None
|
||||
|
||||
def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
|
||||
return Cookie(
|
||||
version or 0, name, value, None, False,
|
||||
domain, bool(domain), bool(domain), path, bool(path),
|
||||
secure, expires, False, None, None, rest={})
|
||||
|
||||
_test_url = 'https://yt.dlp/test'
|
||||
|
||||
def test(encoded_cookies, cookies, *, headers=False, round_trip=None, error_re=None):
|
||||
def _test():
|
||||
ydl.cookiejar.clear()
|
||||
ydl._load_cookies(encoded_cookies, autoscope=headers)
|
||||
if headers:
|
||||
ydl._apply_header_cookies(_test_url)
|
||||
data = {'url': _test_url}
|
||||
ydl._calc_headers(data)
|
||||
self.assertCountEqual(
|
||||
map(vars, ydl.cookiejar), map(vars, cookies),
|
||||
'Extracted cookiejar.Cookie is not the same')
|
||||
if not headers:
|
||||
self.assertEqual(
|
||||
data.get('cookies'), round_trip or encoded_cookies,
|
||||
'Cookie is not the same as round trip')
|
||||
ydl.__dict__['_YoutubeDL__header_cookies'] = []
|
||||
|
||||
with self.subTest(msg=encoded_cookies):
|
||||
if not error_re:
|
||||
_test()
|
||||
return
|
||||
with self.assertRaisesRegex(Exception, error_re):
|
||||
_test()
|
||||
|
||||
test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
|
||||
test('test=value', [cookie('test', 'value')], error_re=r'Unscoped cookies are not allowed')
|
||||
test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
|
||||
cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
|
||||
cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
|
||||
test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [
|
||||
cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)])
|
||||
test('test="value; "; path=/test; domain=.yt.dlp', [
|
||||
cookie('test', 'value; ', domain='.yt.dlp', path='/test')],
|
||||
round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test')
|
||||
test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')],
|
||||
round_trip='name=""; Domain=.yt.dlp')
|
||||
|
||||
test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
|
||||
test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error_re=r'Invalid syntax')
|
||||
ydl.deprecated_feature = ydl.report_error
|
||||
test('test=value', [], headers=True, error_re=r'Passing cookies as a header is a potential security risk')
|
||||
|
||||
def test_infojson_cookies(self):
|
||||
TEST_FILE = 'test_infojson_cookies.info.json'
|
||||
TEST_URL = 'https://example.com/example.mp4'
|
||||
COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
|
||||
COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
|
||||
|
||||
ydl = FakeYDL()
|
||||
ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
|
||||
|
||||
def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
|
||||
fmt = {'url': TEST_URL}
|
||||
if fmts_header_cookies:
|
||||
fmt['http_headers'] = COOKIE_HEADER
|
||||
if cookies_field:
|
||||
fmt['cookies'] = COOKIES
|
||||
return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
|
||||
|
||||
def test(initial_info, note):
|
||||
result = {}
|
||||
result['processed'] = ydl.process_ie_result(initial_info)
|
||||
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
|
||||
msg=f'No cookies set in cookiejar after initial process when {note}')
|
||||
ydl.cookiejar.clear()
|
||||
with open(TEST_FILE) as infojson:
|
||||
result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
|
||||
result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
|
||||
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
|
||||
msg=f'No cookies set in cookiejar after final process when {note}')
|
||||
ydl.cookiejar.clear()
|
||||
for key in ('processed', 'loaded', 'final'):
|
||||
info = result[key]
|
||||
self.assertIsNone(
|
||||
traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
|
||||
msg=f'Cookie header not removed in {key} result when {note}')
|
||||
self.assertEqual(
|
||||
traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
|
||||
msg=f'No cookies field found in {key} result when {note}')
|
||||
|
||||
test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
|
||||
test(make_info(info_header_cookies=True), 'info_dict header cokies')
|
||||
test(make_info(fmts_header_cookies=True), 'format header cookies')
|
||||
test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
|
||||
test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
|
||||
test(make_info(cookies_field=True), 'cookies format field')
|
||||
test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
|
||||
|
||||
try_rm(TEST_FILE)
|
||||
|
||||
def test_add_headers_cookie(self):
|
||||
def check_for_cookie_header(result):
|
||||
return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
|
||||
|
||||
ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
|
||||
ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com
|
||||
|
||||
fmt = {'url': 'https://example.com/video.mp4'}
|
||||
result = ydl.process_ie_result(_make_result([fmt]), download=False)
|
||||
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
|
||||
self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
|
||||
self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
|
||||
|
||||
fmt = {'url': 'https://wrong.com/video.mp4'}
|
||||
result = ydl.process_ie_result(_make_result([fmt]), download=False)
|
||||
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
|
||||
self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
|
||||
self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -17,10 +17,10 @@
|
|||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
def test_keep_session_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
try:
|
||||
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.save(filename=tf.name)
|
||||
temp = tf.read().decode()
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
|
||||
|
@ -32,7 +32,7 @@ def test_keep_session_cookies(self):
|
|||
|
||||
def test_strip_httponly_prefix(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
|
||||
def assert_cookie_has_value(key):
|
||||
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
|
||||
|
@ -42,17 +42,25 @@ def assert_cookie_has_value(key):
|
|||
|
||||
def test_malformed_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
# Cookies should be empty since all malformed cookie file entries
|
||||
# will be ignored
|
||||
self.assertFalse(cookiejar._cookies)
|
||||
|
||||
def test_get_cookie_header(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
|
||||
self.assertIn('HTTPONLY_COOKIE', header)
|
||||
|
||||
def test_get_cookies_for_url(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load()
|
||||
cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
|
||||
self.assertEqual(len(cookies), 2)
|
||||
cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
|
||||
self.assertFalse(cookies)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -9,15 +9,16 @@
|
|||
|
||||
|
||||
import struct
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp import compat
|
||||
from yt_dlp.compat import urllib # isort: split
|
||||
from yt_dlp.compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from yt_dlp.compat.urllib.request import getproxies
|
||||
|
||||
|
||||
class TestCompat(unittest.TestCase):
|
||||
|
@ -28,8 +29,7 @@ def test_compat_passthrough(self):
|
|||
with self.assertWarns(DeprecationWarning):
|
||||
compat.WINDOWS_VT_MODE
|
||||
|
||||
# TODO: Test submodule
|
||||
# compat.asyncio.events # Must not raise error
|
||||
self.assertEqual(urllib.request.getproxies, getproxies)
|
||||
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
compat.compat_pycrypto_AES # Must not raise error
|
||||
|
|
|
@ -10,10 +10,7 @@
|
|||
|
||||
import collections
|
||||
import hashlib
|
||||
import http.client
|
||||
import json
|
||||
import socket
|
||||
import urllib.error
|
||||
|
||||
from test.helper import (
|
||||
assertGreaterEqual,
|
||||
|
@ -29,6 +26,7 @@
|
|||
|
||||
import yt_dlp.YoutubeDL # isort: split
|
||||
from yt_dlp.extractor import get_info_extractor
|
||||
from yt_dlp.networking.exceptions import HTTPError, TransportError
|
||||
from yt_dlp.utils import (
|
||||
DownloadError,
|
||||
ExtractorError,
|
||||
|
@ -162,8 +160,7 @@ def try_rm_tcs_files(tcs=None):
|
|||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine)
|
||||
or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)):
|
||||
if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
|
||||
err.msg = f'{getattr(err, "msg", err)} ({tname})'
|
||||
raise
|
||||
|
||||
|
@ -249,7 +246,7 @@ def try_rm_tcs_files(tcs=None):
|
|||
# extractor returns full results even with extract_flat
|
||||
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
||||
try_rm_tcs_files(res_tcs)
|
||||
|
||||
ydl.close()
|
||||
return test_template
|
||||
|
||||
|
||||
|
|
139
test/test_downloader_external.py
Normal file
139
test/test_downloader_external.py
Normal file
|
@ -0,0 +1,139 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import http.cookiejar
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from yt_dlp.downloader.external import (
|
||||
Aria2cFD,
|
||||
AxelFD,
|
||||
CurlFD,
|
||||
FFmpegFD,
|
||||
HttpieFD,
|
||||
WgetFD,
|
||||
)
|
||||
|
||||
TEST_COOKIE = {
|
||||
'version': 0,
|
||||
'name': 'test',
|
||||
'value': 'ytdlp',
|
||||
'port': None,
|
||||
'port_specified': False,
|
||||
'domain': '.example.com',
|
||||
'domain_specified': True,
|
||||
'domain_initial_dot': False,
|
||||
'path': '/',
|
||||
'path_specified': True,
|
||||
'secure': False,
|
||||
'expires': None,
|
||||
'discard': False,
|
||||
'comment': None,
|
||||
'comment_url': None,
|
||||
'rest': {},
|
||||
}
|
||||
|
||||
TEST_INFO = {'url': 'http://www.example.com/'}
|
||||
|
||||
|
||||
class TestHttpieFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = HttpieFD(ydl, {})
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['http', '--download', '--output', 'test', 'http://www.example.com/'])
|
||||
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp'])
|
||||
|
||||
|
||||
class TestAxelFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = AxelFD(ydl, {})
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['axel', '-o', 'test', '--', 'http://www.example.com/'])
|
||||
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['axel', '-o', 'test', '-H', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
|
||||
|
||||
|
||||
class TestWgetFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = WgetFD(ydl, {})
|
||||
self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||
# Test cookiejar tempfile arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||
|
||||
|
||||
class TestCurlFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = CurlFD(ydl, {})
|
||||
self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||
self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO))
|
||||
|
||||
|
||||
class TestAria2cFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = Aria2cFD(ydl, {})
|
||||
downloader._make_cmd('test', TEST_INFO)
|
||||
self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
|
||||
|
||||
# Test cookiejar tempfile arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
cmd = downloader._make_cmd('test', TEST_INFO)
|
||||
self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd)
|
||||
|
||||
|
||||
@unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found')
|
||||
class TestFFmpegFD(unittest.TestCase):
|
||||
_args = []
|
||||
|
||||
def _test_cmd(self, args):
|
||||
self._args = args
|
||||
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = FFmpegFD(ydl, {})
|
||||
downloader._debug_cmd = self._test_cmd
|
||||
|
||||
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/',
|
||||
'-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
# Test cookies arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
|
||||
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
# Test with non-url input (ffmpeg reads from stdin '-' for websockets)
|
||||
downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -16,6 +16,7 @@
|
|||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.downloader.http import HttpFD
|
||||
from yt_dlp.utils import encodeFilename
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
@ -67,17 +68,6 @@ def do_GET(self):
|
|||
assert False
|
||||
|
||||
|
||||
class FakeLogger:
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHttpFD(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = http.server.HTTPServer(
|
||||
|
|
|
@ -1,500 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import gzip
|
||||
import http.cookiejar
|
||||
import http.server
|
||||
import io
|
||||
import pathlib
|
||||
import ssl
|
||||
import tempfile
|
||||
import threading
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import zlib
|
||||
|
||||
from test.helper import http_server_port
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.dependencies import brotli
|
||||
from yt_dlp.utils import sanitized_Request, urlencode_postdata
|
||||
|
||||
from .helper import FakeYDL
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
protocol_version = 'HTTP/1.1'
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def _headers(self):
|
||||
payload = str(self.headers).encode('utf-8')
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _redirect(self):
|
||||
self.send_response(int(self.path[len('/redirect_'):]))
|
||||
self.send_header('Location', '/method')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
|
||||
def _method(self, method, payload=None):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Length', str(len(payload or '')))
|
||||
self.send_header('Method', method)
|
||||
self.end_headers()
|
||||
if payload:
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _status(self, status):
|
||||
payload = f'<html>{status} NOT FOUND</html>'.encode()
|
||||
self.send_response(int(status))
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _read_data(self):
|
||||
if 'Content-Length' in self.headers:
|
||||
return self.rfile.read(int(self.headers['Content-Length']))
|
||||
|
||||
def do_POST(self):
|
||||
data = self._read_data()
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('POST', data)
|
||||
elif self.path.startswith('/headers'):
|
||||
self._headers()
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_HEAD(self):
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('HEAD')
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_PUT(self):
|
||||
data = self._read_data()
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('PUT', data)
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/video.html':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload))) # required for persistent connections
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/vid.mp4':
|
||||
payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/%c7%9f':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('GET')
|
||||
elif self.path.startswith('/headers'):
|
||||
self._headers()
|
||||
elif self.path == '/trailing_garbage':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Encoding', 'gzip')
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||
f.write(payload)
|
||||
compressed = buf.getvalue() + b'trailing garbage'
|
||||
self.send_header('Content-Length', str(len(compressed)))
|
||||
self.end_headers()
|
||||
self.wfile.write(compressed)
|
||||
elif self.path == '/302-non-ascii-redirect':
|
||||
new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
|
||||
self.send_response(301)
|
||||
self.send_header('Location', new_url)
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path == '/content-encoding':
|
||||
encodings = self.headers.get('ytdl-encoding', '')
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
for encoding in filter(None, (e.strip() for e in encodings.split(','))):
|
||||
if encoding == 'br' and brotli:
|
||||
payload = brotli.compress(payload)
|
||||
elif encoding == 'gzip':
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||
f.write(payload)
|
||||
payload = buf.getvalue()
|
||||
elif encoding == 'deflate':
|
||||
payload = zlib.compress(payload)
|
||||
elif encoding == 'unsupported':
|
||||
payload = b'raw'
|
||||
break
|
||||
else:
|
||||
self._status(415)
|
||||
return
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Encoding', encodings)
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def send_header(self, keyword, value):
|
||||
"""
|
||||
Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
|
||||
This is against what is defined in RFC 3986, however we need to test we support this
|
||||
since some sites incorrectly do this.
|
||||
"""
|
||||
if keyword.lower() == 'connection':
|
||||
return super().send_header(keyword, value)
|
||||
|
||||
if not hasattr(self, '_headers_buffer'):
|
||||
self._headers_buffer = []
|
||||
|
||||
self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
|
||||
|
||||
|
||||
class FakeLogger:
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# HTTP server
|
||||
self.http_httpd = http.server.ThreadingHTTPServer(
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.http_port = http_server_port(self.http_httpd)
|
||||
self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
|
||||
# FIXME: we should probably stop the http server thread after each test
|
||||
# See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
|
||||
self.http_server_thread.daemon = True
|
||||
self.http_server_thread.start()
|
||||
|
||||
# HTTPS server
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.https_httpd = http.server.ThreadingHTTPServer(
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
self.https_httpd.socket = sslctx.wrap_socket(self.https_httpd.socket, server_side=True)
|
||||
self.https_port = http_server_port(self.https_httpd)
|
||||
self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
|
||||
self.https_server_thread.daemon = True
|
||||
self.https_server_thread.start()
|
||||
|
||||
def test_nocheckcertificate(self):
|
||||
with FakeYDL({'logger': FakeLogger()}) as ydl:
|
||||
with self.assertRaises(urllib.error.URLError):
|
||||
ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||
|
||||
with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
|
||||
r = ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||
self.assertEqual(r.status, 200)
|
||||
r.close()
|
||||
|
||||
def test_percent_encode(self):
|
||||
with FakeYDL() as ydl:
|
||||
# Unicode characters should be encoded with uppercase percent-encoding
|
||||
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
|
||||
self.assertEqual(res.status, 200)
|
||||
res.close()
|
||||
# don't normalize existing percent encodings
|
||||
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
|
||||
self.assertEqual(res.status, 200)
|
||||
res.close()
|
||||
|
||||
def test_unicode_path_redirection(self):
|
||||
with FakeYDL() as ydl:
|
||||
r = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||
self.assertEqual(r.url, f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html')
|
||||
r.close()
|
||||
|
||||
def test_redirect(self):
|
||||
with FakeYDL() as ydl:
|
||||
def do_req(redirect_status, method):
|
||||
data = b'testdata' if method in ('POST', 'PUT') else None
|
||||
res = ydl.urlopen(sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
|
||||
return res.read().decode('utf-8'), res.headers.get('method', '')
|
||||
|
||||
# A 303 must either use GET or HEAD for subsequent request
|
||||
self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
|
||||
|
||||
# 301 and 302 turn POST only into a GET
|
||||
self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
|
||||
self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
|
||||
self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
|
||||
|
||||
# 307 and 308 should not change method
|
||||
for m in ('POST', 'PUT'):
|
||||
self.assertEqual(do_req(307, m), ('testdata', m))
|
||||
self.assertEqual(do_req(308, m), ('testdata', m))
|
||||
|
||||
self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
|
||||
self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
# These should not redirect and instead raise an HTTPError
|
||||
for code in (300, 304, 305, 306):
|
||||
with self.assertRaises(urllib.error.HTTPError):
|
||||
do_req(code, 'GET')
|
||||
|
||||
def test_content_type(self):
|
||||
# https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
|
||||
with FakeYDL({'nocheckcertificate': True}) as ydl:
|
||||
# method should be auto-detected as POST
|
||||
r = sanitized_Request(f'https://localhost:{self.https_port}/headers', data=urlencode_postdata({'test': 'test'}))
|
||||
|
||||
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||
|
||||
# test http
|
||||
r = sanitized_Request(f'http://localhost:{self.http_port}/headers', data=urlencode_postdata({'test': 'test'}))
|
||||
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||
|
||||
def test_cookiejar(self):
|
||||
with FakeYDL() as ydl:
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
|
||||
False, '/headers', True, False, None, False, None, None, {}))
|
||||
data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
|
||||
self.assertIn(b'Cookie: test=ytdlp', data)
|
||||
|
||||
def test_no_compression_compat_header(self):
|
||||
with FakeYDL() as ydl:
|
||||
data = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/headers',
|
||||
headers={'Youtubedl-no-compression': True})).read()
|
||||
self.assertIn(b'Accept-Encoding: identity', data)
|
||||
self.assertNotIn(b'youtubedl-no-compression', data.lower())
|
||||
|
||||
def test_gzip_trailing_garbage(self):
|
||||
# https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
|
||||
# https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
|
||||
with FakeYDL() as ydl:
|
||||
data = ydl.urlopen(sanitized_Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode('utf-8')
|
||||
self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
@unittest.skipUnless(brotli, 'brotli support is not installed')
|
||||
def test_brotli(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'br'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'br')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_deflate(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'deflate'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'deflate')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_gzip(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'gzip'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'gzip')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_multiple_encodings(self):
|
||||
# https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
|
||||
with FakeYDL() as ydl:
|
||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': pair}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), pair)
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_unsupported_encoding(self):
|
||||
# it should return the raw content
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'unsupported'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
|
||||
self.assertEqual(res.read(), b'raw')
|
||||
|
||||
|
||||
class TestClientCert(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
|
||||
cacertfn = os.path.join(self.certdir, 'ca.crt')
|
||||
self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.verify_mode = ssl.CERT_REQUIRED
|
||||
sslctx.load_verify_locations(cafile=cacertfn)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def _run_test(self, **params):
|
||||
ydl = YoutubeDL({
|
||||
'logger': FakeLogger(),
|
||||
# Disable client-side validation of unacceptable self-signed testcert.pem
|
||||
# The test is of a check on the server side, so unaffected
|
||||
'nocheckcertificate': True,
|
||||
**params,
|
||||
})
|
||||
r = ydl.extract_info(f'https://127.0.0.1:{self.port}/video.html')
|
||||
self.assertEqual(r['url'], f'https://127.0.0.1:{self.port}/vid.mp4')
|
||||
|
||||
def test_certificate_combined_nopass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
|
||||
|
||||
def test_certificate_nocombined_nopass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
|
||||
client_certificate_key=os.path.join(self.certdir, 'client.key'))
|
||||
|
||||
def test_certificate_combined_pass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||
client_certificate_password='foobar')
|
||||
|
||||
def test_certificate_nocombined_pass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
|
||||
client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'),
|
||||
client_certificate_password='foobar')
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
proxy_name = name
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
|
||||
return HTTPTestRequestHandler
|
||||
|
||||
|
||||
class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = http.server.HTTPServer(
|
||||
('127.0.0.1', 0), _build_proxy_handler('normal'))
|
||||
self.port = http_server_port(self.proxy)
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.geo_proxy = http.server.HTTPServer(
|
||||
('127.0.0.1', 0), _build_proxy_handler('geo'))
|
||||
self.geo_port = http_server_port(self.geo_proxy)
|
||||
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
|
||||
self.geo_proxy_thread.daemon = True
|
||||
self.geo_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
geo_proxy = f'127.0.0.1:{self.geo_port}'
|
||||
ydl = YoutubeDL({
|
||||
'proxy': f'127.0.0.1:{self.port}',
|
||||
'geo_verification_proxy': geo_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
response = ydl.urlopen(url).read().decode()
|
||||
self.assertEqual(response, f'normal: {url}')
|
||||
|
||||
req = urllib.request.Request(url)
|
||||
req.add_header('Ytdl-request-proxy', geo_proxy)
|
||||
response = ydl.urlopen(req).read().decode()
|
||||
self.assertEqual(response, f'geo: {url}')
|
||||
|
||||
def test_proxy_with_idn(self):
|
||||
ydl = YoutubeDL({
|
||||
'proxy': f'127.0.0.1:{self.port}',
|
||||
})
|
||||
url = 'http://中文.tw/'
|
||||
response = ydl.urlopen(url).read().decode()
|
||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||
|
||||
|
||||
class TestFileURL(unittest.TestCase):
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||
def test_file_urls(self):
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
tf.write(b'foobar')
|
||||
tf.close()
|
||||
url = pathlib.Path(tf.name).as_uri()
|
||||
with FakeYDL() as ydl:
|
||||
self.assertRaisesRegex(
|
||||
urllib.error.URLError, 'file:// URLs are explicitly disabled in yt-dlp for security reasons', ydl.urlopen, url)
|
||||
with FakeYDL({'enable_file_urls': True}) as ydl:
|
||||
res = ydl.urlopen(url)
|
||||
self.assertEqual(res.read(), b'foobar')
|
||||
res.close()
|
||||
os.unlink(tf.name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
1382
test/test_networking.py
Normal file
1382
test/test_networking.py
Normal file
File diff suppressed because it is too large
Load diff
279
test/test_networking_utils.py
Normal file
279
test/test_networking_utils.py
Normal file
|
@ -0,0 +1,279 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import platform
|
||||
import random
|
||||
import ssl
|
||||
import urllib.error
|
||||
import warnings
|
||||
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import certifi
|
||||
from yt_dlp.networking import Response
|
||||
from yt_dlp.networking._helper import (
|
||||
InstanceStoreMixin,
|
||||
add_accept_encoding_header,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
select_proxy,
|
||||
ssl_load_certs,
|
||||
)
|
||||
from yt_dlp.networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
_CompatHTTPError,
|
||||
)
|
||||
from yt_dlp.socks import ProxyType
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class TestNetworkingUtils:
|
||||
|
||||
def test_select_proxy(self):
|
||||
proxies = {
|
||||
'all': 'socks5://example.com',
|
||||
'http': 'http://example.com:1080',
|
||||
'no': 'bypass.example.com,yt-dl.org'
|
||||
}
|
||||
|
||||
assert select_proxy('https://example.com', proxies) == proxies['all']
|
||||
assert select_proxy('http://example.com', proxies) == proxies['http']
|
||||
assert select_proxy('http://bypass.example.com', proxies) is None
|
||||
assert select_proxy('https://yt-dl.org', proxies) is None
|
||||
|
||||
@pytest.mark.parametrize('socks_proxy,expected', [
|
||||
('socks5h://example.com', {
|
||||
'proxytype': ProxyType.SOCKS5,
|
||||
'addr': 'example.com',
|
||||
'port': 1080,
|
||||
'rdns': True,
|
||||
'username': None,
|
||||
'password': None
|
||||
}),
|
||||
('socks5://user:@example.com:5555', {
|
||||
'proxytype': ProxyType.SOCKS5,
|
||||
'addr': 'example.com',
|
||||
'port': 5555,
|
||||
'rdns': False,
|
||||
'username': 'user',
|
||||
'password': ''
|
||||
}),
|
||||
('socks4://u%40ser:pa%20ss@127.0.0.1:1080', {
|
||||
'proxytype': ProxyType.SOCKS4,
|
||||
'addr': '127.0.0.1',
|
||||
'port': 1080,
|
||||
'rdns': False,
|
||||
'username': 'u@ser',
|
||||
'password': 'pa ss'
|
||||
}),
|
||||
('socks4a://:pa%20ss@127.0.0.1', {
|
||||
'proxytype': ProxyType.SOCKS4A,
|
||||
'addr': '127.0.0.1',
|
||||
'port': 1080,
|
||||
'rdns': True,
|
||||
'username': '',
|
||||
'password': 'pa ss'
|
||||
})
|
||||
])
|
||||
def test_make_socks_proxy_opts(self, socks_proxy, expected):
|
||||
assert make_socks_proxy_opts(socks_proxy) == expected
|
||||
|
||||
def test_make_socks_proxy_unknown(self):
|
||||
with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'):
|
||||
make_socks_proxy_opts('socks://127.0.0.1')
|
||||
|
||||
@pytest.mark.skipif(not certifi, reason='certifi is not installed')
|
||||
def test_load_certifi(self):
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context, use_certifi=True)
|
||||
context2.load_verify_locations(cafile=certifi.where())
|
||||
assert context.get_ca_certs() == context2.get_ca_certs()
|
||||
|
||||
# Test load normal certs
|
||||
# XXX: could there be a case where system certs are the same as certifi?
|
||||
context3 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context3, use_certifi=False)
|
||||
assert context3.get_ca_certs() != context.get_ca_certs()
|
||||
|
||||
@pytest.mark.parametrize('method,status,expected', [
|
||||
('GET', 303, 'GET'),
|
||||
('HEAD', 303, 'HEAD'),
|
||||
('PUT', 303, 'GET'),
|
||||
('POST', 301, 'GET'),
|
||||
('HEAD', 301, 'HEAD'),
|
||||
('POST', 302, 'GET'),
|
||||
('HEAD', 302, 'HEAD'),
|
||||
('PUT', 302, 'PUT'),
|
||||
('POST', 308, 'POST'),
|
||||
('POST', 307, 'POST'),
|
||||
('HEAD', 308, 'HEAD'),
|
||||
('HEAD', 307, 'HEAD'),
|
||||
])
|
||||
def test_get_redirect_method(self, method, status, expected):
|
||||
assert get_redirect_method(method, status) == expected
|
||||
|
||||
@pytest.mark.parametrize('headers,supported_encodings,expected', [
|
||||
({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}),
|
||||
({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}),
|
||||
({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}),
|
||||
])
|
||||
def test_add_accept_encoding_header(self, headers, supported_encodings, expected):
|
||||
headers = HTTPHeaderDict(headers)
|
||||
add_accept_encoding_header(headers, supported_encodings)
|
||||
assert headers == HTTPHeaderDict(expected)
|
||||
|
||||
|
||||
class TestInstanceStoreMixin:
|
||||
|
||||
class FakeInstanceStoreMixin(InstanceStoreMixin):
|
||||
def _create_instance(self, **kwargs):
|
||||
return random.randint(0, 1000000)
|
||||
|
||||
def _close_instance(self, instance):
|
||||
pass
|
||||
|
||||
def test_mixin(self):
|
||||
mixin = self.FakeInstanceStoreMixin()
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
|
||||
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
|
||||
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}}))
|
||||
|
||||
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3])
|
||||
|
||||
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4])
|
||||
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar)
|
||||
|
||||
assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar())
|
||||
|
||||
# Different order
|
||||
assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar)
|
||||
|
||||
m = mixin._get_instance(t=1234)
|
||||
assert mixin._get_instance(t=1234) == m
|
||||
mixin._clear_instances()
|
||||
assert mixin._get_instance(t=1234) != m
|
||||
|
||||
|
||||
class TestNetworkingExceptions:
|
||||
|
||||
@staticmethod
|
||||
def create_response(status):
|
||||
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
|
||||
def test_http_error(self, http_error_class):
|
||||
|
||||
response = self.create_response(403)
|
||||
error = http_error_class(response)
|
||||
|
||||
assert error.status == 403
|
||||
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
|
||||
assert error.reason == response.reason
|
||||
assert error.response is response
|
||||
|
||||
data = error.response.read()
|
||||
assert data == b'test'
|
||||
assert repr(error) == '<HTTPError 403: Forbidden>'
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
|
||||
def test_redirect_http_error(self, http_error_class):
|
||||
response = self.create_response(301)
|
||||
error = http_error_class(response, redirect_loop=True)
|
||||
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
|
||||
assert error.reason == 'Moved Permanently'
|
||||
|
||||
def test_compat_http_error(self):
|
||||
response = self.create_response(403)
|
||||
error = _CompatHTTPError(HTTPError(response))
|
||||
assert isinstance(error, HTTPError)
|
||||
assert isinstance(error, urllib.error.HTTPError)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def raises_deprecation_warning():
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('always')
|
||||
yield
|
||||
|
||||
if len(w) == 0:
|
||||
pytest.fail('Did not raise DeprecationWarning')
|
||||
if len(w) > 1:
|
||||
pytest.fail(f'Raised multiple warnings: {w}')
|
||||
|
||||
if not issubclass(w[-1].category, DeprecationWarning):
|
||||
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
|
||||
w.clear()
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.code == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.getcode() == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.hdrs is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.info() is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.headers is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.filename == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.url == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.geturl() == error.response.url
|
||||
|
||||
# Passthrough file operations
|
||||
with raises_deprecation_warning():
|
||||
assert error.read() == b'test'
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert not error.closed
|
||||
|
||||
with raises_deprecation_warning():
|
||||
# Technically Response operations are also passed through, which should not be used.
|
||||
assert error.get_header('test') == 'test'
|
||||
|
||||
# Should not raise a warning
|
||||
error.close()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
|
||||
def test_compat_http_error_autoclose(self):
|
||||
# Compat HTTPError should not autoclose response
|
||||
response = self.create_response(403)
|
||||
_CompatHTTPError(HTTPError(response))
|
||||
assert not response.closed
|
||||
|
||||
def test_incomplete_read_error(self):
|
||||
error = IncompleteRead(b'test', 3, cause='test')
|
||||
assert isinstance(error, IncompleteRead)
|
||||
assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
|
||||
assert str(error) == error.msg == '4 bytes read, 3 more expected'
|
||||
assert error.partial == b'test'
|
||||
assert error.expected == 3
|
||||
assert error.cause == 'test'
|
||||
|
||||
error = IncompleteRead(b'aaa')
|
||||
assert repr(error) == '<IncompleteRead: 3 bytes read>'
|
||||
assert str(error) == '3 bytes read'
|
|
@ -51,6 +51,7 @@
|
|||
escape_url,
|
||||
expand_path,
|
||||
extract_attributes,
|
||||
extract_basic_auth,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
|
@ -103,7 +104,6 @@
|
|||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
|
@ -132,6 +132,7 @@
|
|||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class TestUtil(unittest.TestCase):
|
||||
|
@ -258,15 +259,6 @@ def test_sanitize_url(self):
|
|||
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
|
||||
self.assertEqual(sanitize_url('foo bar'), 'foo bar')
|
||||
|
||||
def test_extract_basic_auth(self):
|
||||
auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
|
||||
self.assertFalse(auth_header('http://foo.bar'))
|
||||
self.assertFalse(auth_header('http://:foo.bar'))
|
||||
self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
|
||||
self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
|
||||
self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
|
||||
self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
|
||||
|
||||
def test_expand_path(self):
|
||||
def env(var):
|
||||
return f'%{var}%' if sys.platform == 'win32' else f'${var}'
|
||||
|
@ -668,6 +660,8 @@ def test_parse_duration(self):
|
|||
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
|
||||
self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
|
||||
self.assertEqual(parse_duration('103:050'), 103.05)
|
||||
self.assertEqual(parse_duration('1HR 3MIN'), 3780)
|
||||
self.assertEqual(parse_duration('2hrs 3mins'), 7380)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
|
@ -1840,6 +1834,8 @@ def test_iri_to_uri(self):
|
|||
def test_clean_podcast_url(self):
|
||||
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661')
|
||||
self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3')
|
||||
|
||||
def test_LazyList(self):
|
||||
it = list(range(10))
|
||||
|
@ -2327,6 +2323,44 @@ def test_traverse_obj(self):
|
|||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give group name as well')
|
||||
|
||||
def test_http_header_dict(self):
|
||||
headers = HTTPHeaderDict()
|
||||
headers['ytdl-test'] = 1
|
||||
self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
|
||||
headers['Ytdl-test'] = '2'
|
||||
self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')])
|
||||
self.assertTrue('ytDl-Test' in headers)
|
||||
self.assertEqual(str(headers), str(dict(headers)))
|
||||
self.assertEqual(repr(headers), str(dict(headers)))
|
||||
|
||||
headers.update({'X-dlp': 'data'})
|
||||
self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')})
|
||||
self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'})
|
||||
self.assertEqual(len(headers), 2)
|
||||
self.assertEqual(headers.copy(), headers)
|
||||
headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'})
|
||||
self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')})
|
||||
self.assertEqual(len(headers2), 2)
|
||||
headers2.clear()
|
||||
self.assertEqual(len(headers2), 0)
|
||||
|
||||
# ensure we prefer latter headers
|
||||
headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2})
|
||||
self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')})
|
||||
del headers3['ytdl-tesT']
|
||||
self.assertEqual(dict(headers3), {})
|
||||
|
||||
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
|
||||
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
|
||||
|
||||
def test_extract_basic_auth(self):
|
||||
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
|
||||
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
|
||||
assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==')
|
||||
assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=')
|
||||
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
|
||||
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import collections
|
||||
import contextlib
|
||||
import copy
|
||||
import datetime
|
||||
import errno
|
||||
import fileinput
|
||||
import functools
|
||||
import http.cookiejar
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
|
@ -23,9 +24,9 @@
|
|||
import unicodedata
|
||||
|
||||
from .cache import Cache
|
||||
from .compat import urllib # isort: split
|
||||
from .compat import compat_os_name, compat_shlex_quote
|
||||
from .cookies import load_cookies
|
||||
from .compat import functools, urllib # isort: split
|
||||
from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
|
||||
from .cookies import LenientSimpleCookie, load_cookies
|
||||
from .downloader import (
|
||||
DashSegmentsFD,
|
||||
FFmpegFD,
|
||||
|
@ -37,6 +38,16 @@
|
|||
from .extractor.common import UnsupportedURLIE
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .minicurses import format_text
|
||||
from .networking import HEADRequest, Request, RequestDirector
|
||||
from .networking.common import _REQUEST_HANDLERS
|
||||
from .networking.exceptions import (
|
||||
HTTPError,
|
||||
NoSupportingHandlers,
|
||||
RequestError,
|
||||
SSLError,
|
||||
_CompatHTTPError,
|
||||
network_exceptions,
|
||||
)
|
||||
from .plugins import directories as plugin_directories
|
||||
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
|
||||
from .postprocessor import (
|
||||
|
@ -75,13 +86,11 @@
|
|||
ExtractorError,
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
HEADRequest,
|
||||
ISO3166Utils,
|
||||
LazyList,
|
||||
MaxDownloadsReached,
|
||||
Namespace,
|
||||
PagedList,
|
||||
PerRequestProxyHandler,
|
||||
PlaylistEntries,
|
||||
Popen,
|
||||
PostProcessingError,
|
||||
|
@ -90,9 +99,6 @@
|
|||
SameFileError,
|
||||
UnavailableVideoError,
|
||||
UserNotLive,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
YoutubeDLRedirectHandler,
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
bug_reports_message,
|
||||
|
@ -105,6 +111,7 @@
|
|||
error_to_compat_str,
|
||||
escapeHTML,
|
||||
expand_path,
|
||||
extract_basic_auth,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_bytes,
|
||||
|
@ -120,9 +127,6 @@
|
|||
locked_file,
|
||||
make_archive_id,
|
||||
make_dir,
|
||||
make_HTTPS_handler,
|
||||
merge_headers,
|
||||
network_exceptions,
|
||||
number_of_digits,
|
||||
orderedSet,
|
||||
orderedSet_from_options,
|
||||
|
@ -135,8 +139,6 @@
|
|||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
strftime_or_none,
|
||||
subtitles_filename,
|
||||
|
@ -154,6 +156,13 @@
|
|||
write_json_file,
|
||||
write_string,
|
||||
)
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import (
|
||||
HTTPHeaderDict,
|
||||
clean_headers,
|
||||
clean_proxies,
|
||||
std_headers,
|
||||
)
|
||||
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
|
@ -568,7 +577,7 @@ class YoutubeDL:
|
|||
'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
|
||||
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
|
||||
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
|
||||
}
|
||||
|
@ -617,7 +626,8 @@ def __init__(self, params=None, auto_init=True):
|
|||
|
||||
if self.params.get('no_color'):
|
||||
if self.params.get('color') is not None:
|
||||
self.report_warning('Overwriting params from "color" with "no_color"')
|
||||
self.params.setdefault('_warnings', []).append(
|
||||
'Overwriting params from "color" with "no_color"')
|
||||
self.params['color'] = 'no_color'
|
||||
|
||||
term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
|
||||
|
@ -675,6 +685,13 @@ def process_color_policy(stream):
|
|||
raise
|
||||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||
self.__header_cookies = []
|
||||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||||
self.params['http_headers'].pop('Cookie', None)
|
||||
|
||||
self._request_director = self.build_request_director(
|
||||
sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
|
||||
if auto_init and auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
|
||||
|
@ -745,9 +762,6 @@ def check_deprecated(param, option, suggestion):
|
|||
else self.params['format'] if callable(self.params['format'])
|
||||
else self.build_format_selector(self.params['format']))
|
||||
|
||||
# Set http_headers defaults according to std_headers
|
||||
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
|
||||
|
||||
hooks = {
|
||||
'post_hooks': self.add_post_hook,
|
||||
'progress_hooks': self.add_progress_hook,
|
||||
|
@ -764,8 +778,6 @@ def check_deprecated(param, option, suggestion):
|
|||
get_postprocessor(pp_def.pop('key'))(self, **pp_def),
|
||||
when=when)
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
def preload_download_archive(fn):
|
||||
"""Preload the archive, if any is specified"""
|
||||
archive = set()
|
||||
|
@ -941,11 +953,17 @@ def __enter__(self):
|
|||
self.save_console_title()
|
||||
return self
|
||||
|
||||
def save_cookies(self):
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save()
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.restore_console_title()
|
||||
self.close()
|
||||
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||
def close(self):
|
||||
self.save_cookies()
|
||||
self._request_director.close()
|
||||
|
||||
def trouble(self, message=None, tb=None, is_error=True):
|
||||
"""Determine action to take when a download problem appears.
|
||||
|
@ -988,6 +1006,7 @@ def trouble(self, message=None, tb=None, is_error=True):
|
|||
ID='green',
|
||||
DELIM='blue',
|
||||
ERROR='red',
|
||||
BAD_FORMAT='light red',
|
||||
WARNING='yellow',
|
||||
SUPPRESS='light black',
|
||||
)
|
||||
|
@ -1276,28 +1295,27 @@ def create_key(outer_mobj):
|
|||
return outer_mobj.group(0)
|
||||
key = outer_mobj.group('key')
|
||||
mobj = re.match(INTERNAL_FORMAT_RE, key)
|
||||
initial_field = mobj.group('fields') if mobj else ''
|
||||
value, replacement, default = None, None, na
|
||||
value, replacement, default, last_field = None, None, na, ''
|
||||
while mobj:
|
||||
mobj = mobj.groupdict()
|
||||
default = mobj['default'] if mobj['default'] is not None else default
|
||||
value = get_value(mobj)
|
||||
replacement = mobj['replacement']
|
||||
last_field, replacement = mobj['fields'], mobj['replacement']
|
||||
if value is None and mobj['alternate']:
|
||||
mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
|
||||
else:
|
||||
break
|
||||
|
||||
fmt = outer_mobj.group('format')
|
||||
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
|
||||
fmt = f'0{field_size_compat_map[key]:d}d'
|
||||
|
||||
if None not in (value, replacement):
|
||||
try:
|
||||
value = replacement_formatter.format(replacement, value)
|
||||
except ValueError:
|
||||
value, default = None, na
|
||||
|
||||
fmt = outer_mobj.group('format')
|
||||
if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
|
||||
fmt = f'0{field_size_compat_map[last_field]:d}d'
|
||||
|
||||
flags = outer_mobj.group('conversion') or ''
|
||||
str_fmt = f'{fmt[:-1]}s'
|
||||
if value is None:
|
||||
|
@ -1327,7 +1345,7 @@ def create_key(outer_mobj):
|
|||
value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
|
||||
factor=1024 if '#' in flags else 1000)
|
||||
elif fmt[-1] == 'S': # filename sanitization
|
||||
value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
|
||||
value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
|
||||
elif fmt[-1] == 'c':
|
||||
if value:
|
||||
value = str(value)[0]
|
||||
|
@ -1346,7 +1364,7 @@ def create_key(outer_mobj):
|
|||
elif fmt[-1] == 'a':
|
||||
value, fmt = ascii(value), str_fmt
|
||||
if fmt[-1] in 'csra':
|
||||
value = sanitizer(initial_field, value)
|
||||
value = sanitizer(last_field, value)
|
||||
|
||||
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||
TMPL_DICT[key] = value
|
||||
|
@ -1481,7 +1499,10 @@ def check_filter():
|
|||
return ret
|
||||
|
||||
if self.in_download_archive(info_dict):
|
||||
reason = '%s has already been recorded in the archive' % video_title
|
||||
reason = ''.join((
|
||||
format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
|
||||
format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
|
||||
'has already been recorded in the archive'))
|
||||
break_opt, break_err = 'break_on_existing', ExistingVideoReached
|
||||
else:
|
||||
try:
|
||||
|
@ -1542,7 +1563,8 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
|
|||
|
||||
temp_id = ie.get_temp_id(url)
|
||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
|
||||
self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
|
||||
self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
|
||||
'has already been recorded in the archive')
|
||||
if self.params.get('break_on_existing', False):
|
||||
raise ExistingVideoReached()
|
||||
break
|
||||
|
@ -1630,8 +1652,67 @@ def progress(msg):
|
|||
self.to_screen('')
|
||||
raise
|
||||
|
||||
def _load_cookies(self, data, *, autoscope=True):
|
||||
"""Loads cookies from a `Cookie` header
|
||||
|
||||
This tries to work around the security vulnerability of passing cookies to every domain.
|
||||
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
|
||||
|
||||
@param data The Cookie header as string to load the cookies from
|
||||
@param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
|
||||
If `True`, save cookies for later to be stored in the jar with a limited scope
|
||||
If a URL, save cookies in the jar with the domain of the URL
|
||||
"""
|
||||
for cookie in LenientSimpleCookie(data).values():
|
||||
if autoscope and any(cookie.values()):
|
||||
raise ValueError('Invalid syntax in Cookie Header')
|
||||
|
||||
domain = cookie.get('domain') or ''
|
||||
expiry = cookie.get('expires')
|
||||
if expiry == '': # 0 is valid
|
||||
expiry = None
|
||||
prepared_cookie = http.cookiejar.Cookie(
|
||||
cookie.get('version') or 0, cookie.key, cookie.value, None, False,
|
||||
domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
|
||||
cookie.get('secure') or False, expiry, False, None, None, {})
|
||||
|
||||
if domain:
|
||||
self.cookiejar.set_cookie(prepared_cookie)
|
||||
elif autoscope is True:
|
||||
self.deprecated_feature(
|
||||
'Passing cookies as a header is a potential security risk; '
|
||||
'they will be scoped to the domain of the downloaded urls. '
|
||||
'Please consider loading cookies from a file or browser instead.')
|
||||
self.__header_cookies.append(prepared_cookie)
|
||||
elif autoscope:
|
||||
self.report_warning(
|
||||
'The extractor result contains an unscoped cookie as an HTTP header. '
|
||||
f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
|
||||
only_once=True)
|
||||
self._apply_header_cookies(autoscope, [prepared_cookie])
|
||||
else:
|
||||
self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
|
||||
tb=False, is_error=False)
|
||||
|
||||
def _apply_header_cookies(self, url, cookies=None):
|
||||
"""Applies stray header cookies to the provided url
|
||||
|
||||
This loads header cookies and scopes them to the domain provided in `url`.
|
||||
While this is not ideal, it helps reduce the risk of them being sent
|
||||
to an unintended destination while mostly maintaining compatibility.
|
||||
"""
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
if not parsed.hostname:
|
||||
return
|
||||
|
||||
for cookie in map(copy.copy, cookies or self.__header_cookies):
|
||||
cookie.domain = f'.{parsed.hostname}'
|
||||
self.cookiejar.set_cookie(cookie)
|
||||
|
||||
@_handle_extraction_exceptions
|
||||
def __extract_info(self, url, ie, download, extra_info, process):
|
||||
self._apply_header_cookies(url)
|
||||
|
||||
try:
|
||||
ie_result = ie.extract(url)
|
||||
except UserNotLive as e:
|
||||
|
@ -2091,8 +2172,6 @@ def syntax_error(note, start):
|
|||
allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
|
||||
'video': self.params.get('allow_multiple_video_streams', False)}
|
||||
|
||||
check_formats = self.params.get('check_formats') == 'selected'
|
||||
|
||||
def _parse_filter(tokens):
|
||||
filter_parts = []
|
||||
for type, string_, start, _, _ in tokens:
|
||||
|
@ -2265,10 +2344,19 @@ def _merge(formats_pair):
|
|||
return new_dict
|
||||
|
||||
def _check_formats(formats):
|
||||
if not check_formats:
|
||||
if (self.params.get('check_formats') is not None
|
||||
or self.params.get('allow_unplayable_formats')):
|
||||
yield from formats
|
||||
return
|
||||
elif self.params.get('check_formats') == 'selected':
|
||||
yield from self._check_formats(formats)
|
||||
return
|
||||
|
||||
for f in formats:
|
||||
if f.get('has_drm'):
|
||||
yield from self._check_formats([f])
|
||||
else:
|
||||
yield f
|
||||
|
||||
def _build_selector_function(selector):
|
||||
if isinstance(selector, list): # ,
|
||||
|
@ -2407,14 +2495,34 @@ def restore_last_token(self):
|
|||
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
|
||||
return _build_selector_function(parsed_selector)
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
||||
if 'Youtubedl-No-Compression' in res: # deprecated
|
||||
res.pop('Youtubedl-No-Compression', None)
|
||||
res['Accept-Encoding'] = 'identity'
|
||||
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
|
||||
def _calc_headers(self, info_dict, load_cookies=False):
|
||||
res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
|
||||
clean_headers(res)
|
||||
|
||||
if load_cookies: # For --load-info-json
|
||||
self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
|
||||
self._load_cookies(info_dict.get('cookies'), autoscope=False)
|
||||
# The `Cookie` header is removed to prevent leaks and unscoped cookies.
|
||||
# See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
|
||||
res.pop('Cookie', None)
|
||||
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
|
||||
if cookies:
|
||||
res['Cookie'] = cookies
|
||||
encoder = LenientSimpleCookie()
|
||||
values = []
|
||||
for cookie in cookies:
|
||||
_, value = encoder.value_encode(cookie.value)
|
||||
values.append(f'{cookie.name}={value}')
|
||||
if cookie.domain:
|
||||
values.append(f'Domain={cookie.domain}')
|
||||
if cookie.path:
|
||||
values.append(f'Path={cookie.path}')
|
||||
if cookie.secure:
|
||||
values.append('Secure')
|
||||
if cookie.expires:
|
||||
values.append(f'Expires={cookie.expires}')
|
||||
if cookie.version:
|
||||
values.append(f'Version={cookie.version}')
|
||||
info_dict['cookies'] = '; '.join(values)
|
||||
|
||||
if 'X-Forwarded-For' not in res:
|
||||
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
|
||||
|
@ -2620,10 +2728,10 @@ def sanitize_numeric_fields(info):
|
|||
if field_preference:
|
||||
info_dict['_format_sort_fields'] = field_preference
|
||||
|
||||
# or None ensures --clean-infojson removes it
|
||||
info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
|
||||
info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
|
||||
f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
|
||||
if not self.params.get('allow_unplayable_formats'):
|
||||
formats = [f for f in formats if not f.get('has_drm')]
|
||||
formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
|
||||
|
||||
if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
|
||||
self.report_warning(
|
||||
|
@ -2675,7 +2783,12 @@ def is_wellformed(f):
|
|||
and info_dict.get('duration') and format.get('tbr')
|
||||
and not format.get('filesize') and not format.get('filesize_approx')):
|
||||
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
|
||||
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
|
||||
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
|
||||
|
||||
# Safeguard against old/insecure infojson when using --load-info-json
|
||||
if info_dict.get('http_headers'):
|
||||
info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
|
||||
info_dict['http_headers'].pop('Cookie', None)
|
||||
|
||||
# This is copied to http_headers by the above _calc_headers and can now be removed
|
||||
if '__x_forwarded_for_ip' in info_dict:
|
||||
|
@ -2772,11 +2885,8 @@ def is_wellformed(f):
|
|||
formats_to_download = list(format_selector({
|
||||
'formats': formats,
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (
|
||||
# All formats are video-only or
|
||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
|
||||
# all formats are audio-only
|
||||
or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
|
||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
if interactive_format_selection and not formats_to_download:
|
||||
self.report_error('Requested format is not available', tb=False, is_error=False)
|
||||
|
@ -3186,8 +3296,9 @@ def existing_video_file(*filepaths):
|
|||
fd, success = None, True
|
||||
if info_dict.get('protocol') or info_dict.get('url'):
|
||||
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
|
||||
if not (fd is FFmpegFD or fd is DashSegmentsFD) and 'no-direct-merge' not in self.params['compat_opts'] and (info_dict.get('section_start') or info_dict.get('section_end')):
|
||||
msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
|
||||
if fd not in [FFmpegFD, DashSegmentsFD] and 'no-direct-merge' not in self.params['compat_opts'] and (
|
||||
info_dict.get('section_start') or info_dict.get('section_end')):
|
||||
msg = (f'This format cannot be partially downloaded {fd}' if FFmpegFD.available()
|
||||
else 'You have requested downloading the video partially, but ffmpeg is not installed')
|
||||
self.report_error(f'{msg}. Aborting')
|
||||
return
|
||||
|
@ -3346,7 +3457,7 @@ def ffmpeg_fixup(cndn, msg, cls):
|
|||
postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
|
||||
isinstance(pp, FFmpegVideoConvertorPP)
|
||||
and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
|
||||
) for pp in self._pps['post_process'])
|
||||
) for pp in self._pps['post_process']) or fd == FFmpegFD
|
||||
|
||||
if not postprocessed_by_ffmpeg:
|
||||
ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
|
||||
|
@ -3692,7 +3803,7 @@ def render_formats_table(self, info_dict):
|
|||
|
||||
def simplified_codec(f, field):
|
||||
assert field in ('acodec', 'vcodec')
|
||||
codec = f.get(field, 'unknown')
|
||||
codec = f.get(field)
|
||||
if not codec:
|
||||
return 'unknown'
|
||||
elif codec != 'none':
|
||||
|
@ -3727,14 +3838,13 @@ def simplified_codec(f, field):
|
|||
simplified_codec(f, 'acodec'),
|
||||
format_field(f, 'abr', '\t%dk', func=round),
|
||||
format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
|
||||
join_nonempty(
|
||||
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
|
||||
self._format_out('DRM', 'light red') if f.get('has_drm') else None,
|
||||
format_field(f, 'language', '[%s]'),
|
||||
join_nonempty(format_field(f, 'format_note'),
|
||||
join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
|
||||
self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
|
||||
(self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
|
||||
else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
|
||||
format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
delim=', '),
|
||||
delim=' '),
|
||||
delim=', '), delim=' '),
|
||||
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
|
||||
header_line = self._list_format_headers(
|
||||
'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
|
||||
|
@ -3783,12 +3893,6 @@ def list_thumbnails(self, info_dict):
|
|||
def list_subtitles(self, video_id, subtitles, name='subtitles'):
|
||||
self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
if isinstance(req, str):
|
||||
req = sanitized_Request(req)
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
|
@ -3877,13 +3981,8 @@ def get_encoding(stream):
|
|||
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
|
||||
})) or 'none'))
|
||||
|
||||
self._setup_opener()
|
||||
proxy_map = {}
|
||||
for handler in self._opener.handlers:
|
||||
if hasattr(handler, 'proxies'):
|
||||
proxy_map.update(handler.proxies)
|
||||
write_debug(f'Proxy map: {proxy_map}')
|
||||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
# write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['%s%s' % (
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
|
@ -3911,58 +4010,109 @@ def get_encoding(stream):
|
|||
'See https://yt-dl.org/update if you need help updating.' %
|
||||
latest_version)
|
||||
|
||||
def _setup_opener(self):
|
||||
if hasattr(self, '_opener'):
|
||||
return
|
||||
timeout_val = self.params.get('socket_timeout')
|
||||
self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
|
||||
|
||||
opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
|
||||
opts_cookiefile = self.params.get('cookiefile')
|
||||
@functools.cached_property
|
||||
def proxies(self):
|
||||
"""Global proxy configuration"""
|
||||
opts_proxy = self.params.get('proxy')
|
||||
|
||||
self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
|
||||
|
||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
if opts_proxy == '':
|
||||
proxies = {}
|
||||
else:
|
||||
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||
opts_proxy = '__noproxy__'
|
||||
proxies = {'all': opts_proxy}
|
||||
else:
|
||||
proxies = urllib.request.getproxies()
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
|
||||
# compat. Set HTTPS_PROXY to __noproxy__ to revert
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = PerRequestProxyHandler(proxies)
|
||||
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
redirect_handler = YoutubeDLRedirectHandler()
|
||||
data_handler = urllib.request.DataHandler()
|
||||
return proxies
|
||||
|
||||
# When passing our own FileHandler instance, build_opener won't add the
|
||||
# default FileHandler and allows us to disable the file protocol, which
|
||||
# can be used for malicious purposes (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8227)
|
||||
file_handler = urllib.request.FileHandler()
|
||||
@functools.cached_property
|
||||
def cookiejar(self):
|
||||
"""Global cookiejar instance"""
|
||||
return load_cookies(
|
||||
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
|
||||
|
||||
if not self.params.get('enable_file_urls'):
|
||||
def file_open(*args, **kwargs):
|
||||
raise urllib.error.URLError(
|
||||
'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.')
|
||||
file_handler.file_open = file_open
|
||||
@property
|
||||
def _opener(self):
|
||||
"""
|
||||
Get a urllib OpenerDirector from the Urllib handler (deprecated).
|
||||
"""
|
||||
self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
|
||||
handler = self._request_director.handlers['Urllib']
|
||||
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
|
||||
|
||||
opener = urllib.request.build_opener(
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
if isinstance(req, str):
|
||||
req = Request(req)
|
||||
elif isinstance(req, urllib.request.Request):
|
||||
self.deprecation_warning(
|
||||
'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
|
||||
'Use yt_dlp.networking.common.Request instead.')
|
||||
req = urllib_req_to_req(req)
|
||||
assert isinstance(req, Request)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders = []
|
||||
self._opener = opener
|
||||
# compat: Assume user:pass url params are basic auth
|
||||
url, basic_auth_header = extract_basic_auth(req.url)
|
||||
if basic_auth_header:
|
||||
req.headers['Authorization'] = basic_auth_header
|
||||
req.url = sanitize_url(url)
|
||||
|
||||
clean_proxies(proxies=req.proxies, headers=req.headers)
|
||||
clean_headers(req.headers)
|
||||
|
||||
try:
|
||||
return self._request_director.send(req)
|
||||
except NoSupportingHandlers as e:
|
||||
for ue in e.unsupported_errors:
|
||||
if not (ue.handler and ue.msg):
|
||||
continue
|
||||
if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
|
||||
raise RequestError(
|
||||
'file:// URLs are disabled by default in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||
raise
|
||||
except SSLError as e:
|
||||
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
|
||||
raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
|
||||
elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
|
||||
raise RequestError(
|
||||
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
|
||||
'Try using --legacy-server-connect', cause=e) from e
|
||||
raise
|
||||
except HTTPError as e: # TODO: Remove in a future release
|
||||
raise _CompatHTTPError(e) from e
|
||||
|
||||
def build_request_director(self, handlers):
|
||||
logger = _YDLLogger(self)
|
||||
headers = self.params.get('http_headers').copy()
|
||||
proxies = self.proxies.copy()
|
||||
clean_headers(headers)
|
||||
clean_proxies(proxies, headers)
|
||||
|
||||
director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
|
||||
for handler in handlers:
|
||||
director.add_handler(handler(
|
||||
logger=logger,
|
||||
headers=headers,
|
||||
cookiejar=self.cookiejar,
|
||||
proxies=proxies,
|
||||
prefer_system_certs='no-certifi' in self.params['compat_opts'],
|
||||
verify=not self.params.get('nocheckcertificate'),
|
||||
**traverse_obj(self.params, {
|
||||
'verbose': 'debug_printtraffic',
|
||||
'source_address': 'source_address',
|
||||
'timeout': 'socket_timeout',
|
||||
'legacy_ssl_support': 'legacyserverconnect',
|
||||
'enable_file_urls': 'enable_file_urls',
|
||||
'client_cert': {
|
||||
'client_certificate': 'client_certificate',
|
||||
'client_certificate_key': 'client_certificate_key',
|
||||
'client_certificate_password': 'client_certificate_password',
|
||||
},
|
||||
}),
|
||||
))
|
||||
return director
|
||||
|
||||
def encode(self, s):
|
||||
if isinstance(s, bytes):
|
||||
|
@ -4115,14 +4265,14 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
|
|||
else:
|
||||
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
|
||||
try:
|
||||
uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
|
||||
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
|
||||
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
ret.append((thumb_filename, thumb_filename_final))
|
||||
t['filepath'] = thumb_filename
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, urllib.error.HTTPError) and err.code == 404:
|
||||
if isinstance(err, HTTPError) and err.status == 404:
|
||||
self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
|
||||
else:
|
||||
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
|
||||
|
|
|
@ -58,11 +58,11 @@
|
|||
read_stdin,
|
||||
render_table,
|
||||
setproctitle,
|
||||
std_headers,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
write_string,
|
||||
)
|
||||
from .utils.networking import std_headers
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
|
|
|
@ -18,7 +18,8 @@ def pycryptodome_module():
|
|||
|
||||
|
||||
def get_hidden_imports():
|
||||
yield 'yt_dlp.compat._legacy'
|
||||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
yield from collect_submodules('websockets')
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from ._deprecated import * # noqa: F401, F403
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
|
||||
passthrough_module(__name__, '._deprecated')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
|
@ -70,3 +67,13 @@ def compat_expanduser(path):
|
|||
return userhome + path[i:]
|
||||
else:
|
||||
compat_expanduser = os.path.expanduser
|
||||
|
||||
|
||||
def urllib_req_to_req(urllib_request):
|
||||
"""Convert urllib Request to a networking Request"""
|
||||
from ..networking import Request
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
return Request(
|
||||
urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
|
||||
headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
|
||||
extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
"""Deprecated - New code should avoid these"""
|
||||
import warnings
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
|
||||
del passthrough_module
|
||||
|
||||
import base64
|
||||
import urllib.error
|
||||
|
@ -8,7 +16,6 @@
|
|||
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
|
|
|
@ -16,12 +16,12 @@
|
|||
import shutil
|
||||
import socket
|
||||
import struct
|
||||
import subprocess
|
||||
import tokenize
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as etree
|
||||
from subprocess import DEVNULL
|
||||
|
||||
# isort: split
|
||||
import asyncio # noqa: F401
|
||||
|
@ -70,6 +70,7 @@ def compat_setenv(key, value, env=os.environ):
|
|||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
|
@ -84,7 +85,7 @@ def compat_setenv(key, value, env=os.environ):
|
|||
compat_Struct = struct.Struct
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
|
|
13
yt_dlp/compat/types.py
Normal file
13
yt_dlp/compat/types.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# flake8: noqa: F405
|
||||
from types import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'types')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
# NB: pypy has builtin NoneType, so checking NameError won't work
|
||||
from types import NoneType # >= 3.10
|
||||
except ImportError:
|
||||
NoneType = type(None)
|
|
@ -1,6 +1,9 @@
|
|||
# flake8: noqa: F405
|
||||
from urllib import * # noqa: F403
|
||||
|
||||
del request
|
||||
from . import request # noqa: F401
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'urllib')
|
||||
|
|
|
@ -41,30 +41,15 @@
|
|||
try_call,
|
||||
write_string,
|
||||
)
|
||||
from .utils._utils import _YDLLogger
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
class YDLLogger:
|
||||
def __init__(self, ydl=None):
|
||||
self._ydl = ydl
|
||||
|
||||
def debug(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.write_debug(message)
|
||||
|
||||
def info(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.to_screen(f'[Cookies] {message}')
|
||||
|
||||
def warning(self, message, only_once=False):
|
||||
if self._ydl:
|
||||
self._ydl.report_warning(message, only_once)
|
||||
|
||||
def error(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.report_error(message)
|
||||
class YDLLogger(_YDLLogger):
|
||||
def warning(self, message, only_once=False): # compat
|
||||
return super().warning(message, once=only_once)
|
||||
|
||||
class ProgressBar(MultilinePrinter):
|
||||
_DELAY, _timer = 0.1, 0
|
||||
|
@ -112,7 +97,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
|||
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if not is_filename or os.access(cookie_file, os.R_OK):
|
||||
jar.load(ignore_discard=True, ignore_expires=True)
|
||||
jar.load()
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
|
@ -1228,7 +1213,7 @@ def open(self, file, *, write=False):
|
|||
file.truncate(0)
|
||||
yield file
|
||||
|
||||
def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
||||
def _really_save(self, f, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if (not ignore_discard and cookie.discard
|
||||
|
@ -1249,7 +1234,7 @@ def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
|||
name, value
|
||||
)))
|
||||
|
||||
def save(self, filename=None, *args, **kwargs):
|
||||
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""
|
||||
Save cookies to a file.
|
||||
Code is taken from CPython 3.6
|
||||
|
@ -1268,9 +1253,9 @@ def save(self, filename=None, *args, **kwargs):
|
|||
|
||||
with self.open(filename, write=True) as f:
|
||||
f.write(self._HEADER)
|
||||
self._really_save(f, *args, **kwargs)
|
||||
self._really_save(f, ignore_discard, ignore_expires)
|
||||
|
||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
def load(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""Load cookies from a file."""
|
||||
if filename is None:
|
||||
if self.filename is not None:
|
||||
|
@ -1327,6 +1312,13 @@ def get_cookie_header(self, url):
|
|||
self.add_cookie_header(cookie_req)
|
||||
return cookie_req.get_header('Cookie')
|
||||
|
||||
def get_cookies_for_url(self, url):
|
||||
"""Generate a list of Cookie objects for a given url"""
|
||||
# Policy `_now` attribute must be set before calling `_cookies_for_request`
|
||||
# Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
|
||||
self._policy._now = self._now = int(time.time())
|
||||
return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))
|
||||
|
||||
def clear(self, *args, **kwargs):
|
||||
with contextlib.suppress(KeyError):
|
||||
return super().clear(*args, **kwargs)
|
||||
|
|
|
@ -255,6 +255,7 @@ def sanitize_open(self, filename, open_mode):
|
|||
|
||||
@wrap_file_access('remove')
|
||||
def try_remove(self, filename):
|
||||
if os.path.isfile(filename):
|
||||
os.remove(filename)
|
||||
|
||||
@wrap_file_access('rename')
|
||||
|
@ -418,7 +419,6 @@ def download(self, filename, info_dict, subtitle=False):
|
|||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
import enum
|
||||
import json
|
||||
import os.path
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import functools
|
||||
from ..networking import Request
|
||||
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
Popen,
|
||||
|
@ -24,7 +26,6 @@
|
|||
encodeFilename,
|
||||
find_available_port,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
@ -42,6 +43,7 @@ class ExternalFD(FragmentFD):
|
|||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
self._cookies_tempfile = None
|
||||
|
||||
try:
|
||||
started = time.time()
|
||||
|
@ -54,6 +56,9 @@ def real_download(self, filename, info_dict):
|
|||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
finally:
|
||||
if self._cookies_tempfile:
|
||||
self.try_remove(self._cookies_tempfile)
|
||||
|
||||
if retval == 0:
|
||||
status = {
|
||||
|
@ -125,6 +130,16 @@ def _configuration_args(self, keys=None, *args, **kwargs):
|
|||
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
|
||||
keys, *args, **kwargs)
|
||||
|
||||
def _write_cookies(self):
|
||||
if not self.ydl.cookiejar.filename:
|
||||
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
|
||||
tmp_cookies.close()
|
||||
self._cookies_tempfile = tmp_cookies.name
|
||||
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
|
||||
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
|
||||
self.ydl.cookiejar.save(self._cookies_tempfile)
|
||||
return self.ydl.cookiejar.filename or self._cookies_tempfile
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
|
@ -184,6 +199,9 @@ class CurlFD(ExternalFD):
|
|||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += ['--cookie', cookie_header]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
|
@ -214,6 +232,9 @@ def _make_cmd(self, tmpfilename, info_dict):
|
|||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', f'{key}: {val}']
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
@ -223,7 +244,9 @@ class WgetFD(ExternalFD):
|
|||
AVAILABLE_OPT = '--version'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
|
||||
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||
cmd += ['--load-cookies', self._write_cookies()]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
|
@ -271,7 +294,7 @@ def _call_downloader(self, tmpfilename, info_dict):
|
|||
return super()._call_downloader(tmpfilename, info_dict)
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c',
|
||||
cmd = [self.exe, '-c', '--no-conf',
|
||||
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
|
||||
if 'fragments' in info_dict:
|
||||
|
@ -279,6 +302,8 @@ def _make_cmd(self, tmpfilename, info_dict):
|
|||
else:
|
||||
cmd += ['--min-split-size', '1M']
|
||||
|
||||
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||
cmd += [f'--load-cookies={self._write_cookies()}']
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
|
@ -333,13 +358,12 @@ def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
|
|||
'method': method,
|
||||
'params': [f'token:{rpc_secret}', *params],
|
||||
}).encode('utf-8')
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
f'http://localhost:{rpc_port}/jsonrpc',
|
||||
data=d, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': f'{len(d)}',
|
||||
'Ytdl-request-proxy': '__noproxy__',
|
||||
})
|
||||
}, proxies={'all': None})
|
||||
with self.ydl.urlopen(request) as r:
|
||||
resp = json.load(r)
|
||||
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
|
||||
|
@ -417,6 +441,14 @@ def _make_cmd(self, tmpfilename, info_dict):
|
|||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += [f'{key}:{val}']
|
||||
|
||||
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
|
||||
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
|
||||
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
|
||||
# 2: https://httpie.io/docs/cli/sessions
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += [f'Cookie:{cookie_header}']
|
||||
return cmd
|
||||
|
||||
|
||||
|
@ -527,7 +559,13 @@ def _call_downloader(self, tmpfilename, info_dict):
|
|||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
|
||||
for cookie in cookies)])
|
||||
if fmt.get('http_headers') and is_http:
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
import itertools
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import fix_xml_ampersands, xpath_text
|
||||
|
||||
|
||||
|
@ -312,7 +312,7 @@ def real_download(self, filename, info_dict):
|
|||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
man_url = urlh.url
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
|
||||
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
|
||||
|
@ -407,8 +407,8 @@ def real_download(self, filename, info_dict):
|
|||
if box_type == b'mdat':
|
||||
self._append_fragment(ctx, box_data)
|
||||
break
|
||||
except urllib.error.HTTPError as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
except HTTPError as err:
|
||||
if live and (err.status == 404 or err.status == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
# with the next available fragment.
|
||||
msg = 'Fragment %d unavailable' % frag_i
|
||||
|
|
|
@ -1,24 +1,19 @@
|
|||
import concurrent.futures
|
||||
import contextlib
|
||||
import http.client
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
RetryManager,
|
||||
encodeFilename,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError, IncompleteRead
|
||||
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
|
@ -75,7 +70,7 @@ def report_skip_fragment(self, frag_index, err=None):
|
|||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
return Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx, info_dict):
|
||||
self._prepare_frag_download(ctx)
|
||||
|
@ -300,9 +295,7 @@ def frag_progress_hook(s):
|
|||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
if os.path.isfile(ytdl_filename):
|
||||
self.try_remove(ytdl_filename)
|
||||
self.try_remove(self.ytdl_filename(ctx['filename']))
|
||||
elapsed = time.time() - ctx['started']
|
||||
|
||||
to_file = ctx['tmpfilename'] != '-'
|
||||
|
@ -459,7 +452,7 @@ def download_fragment(fragment, ctx):
|
|||
|
||||
frag_index = ctx['fragment_index'] = fragment['frag_index']
|
||||
ctx['last_error'] = None
|
||||
headers = info_dict.get('http_headers', {}).copy()
|
||||
headers = HTTPHeaderDict(info_dict.get('http_headers'))
|
||||
byte_range = fragment.get('byte_range')
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
|
@ -479,7 +472,7 @@ def error_callback(err, count, retries):
|
|||
if not self._download_fragment(
|
||||
ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
|
||||
return
|
||||
except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
|
||||
except (HTTPError, IncompleteRead) as err:
|
||||
retry.error = err
|
||||
continue
|
||||
except DownloadError: # has own retry settings
|
||||
|
|
|
@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
|
|||
FD_NAME = 'hlsnative'
|
||||
|
||||
@staticmethod
|
||||
def can_download(manifest, info_dict, allow_unplayable_formats=False):
|
||||
def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
|
||||
return bool(re.search('|'.join((
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
)), manifest))
|
||||
|
||||
@classmethod
|
||||
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
|
||||
UNSUPPORTED_FEATURES = [
|
||||
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
|
@ -50,13 +59,15 @@ def can_download(manifest, info_dict, allow_unplayable_formats=False):
|
|||
]
|
||||
if not allow_unplayable_formats:
|
||||
UNSUPPORTED_FEATURES += [
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
|
||||
]
|
||||
|
||||
def check_results():
|
||||
yield not info_dict.get('is_live')
|
||||
for feature in UNSUPPORTED_FEATURES:
|
||||
yield not re.search(feature, manifest)
|
||||
if not allow_unplayable_formats:
|
||||
yield not cls._has_drm(manifest)
|
||||
return all(check_results())
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
|
@ -64,7 +75,7 @@ def real_download(self, filename, info_dict):
|
|||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
man_url = urlh.url
|
||||
s = urlh.read().decode('utf-8', 'ignore')
|
||||
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
|
@ -81,14 +92,13 @@ def real_download(self, filename, info_dict):
|
|||
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
||||
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
|
||||
if not can_download:
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), s)
|
||||
if has_drm and not self.params.get('allow_unplayable_formats'):
|
||||
if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
|
||||
if info_dict.get('has_drm') and self.params.get('test'):
|
||||
self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
|
||||
else:
|
||||
self.report_error(
|
||||
'This video is DRM protected; Try selecting another format with --format or '
|
||||
'add --check-formats to automatically fallback to the next best format')
|
||||
'This format is DRM protected; Try selecting another format with --format or '
|
||||
'add --check-formats to automatically fallback to the next best format', tb=False)
|
||||
return False
|
||||
message = message or 'Unsupported features have been detected'
|
||||
fd = FFmpegFD(self.ydl, self.params)
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
import http.client
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import (
|
||||
CertificateVerifyError,
|
||||
HTTPError,
|
||||
TransportError,
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
RetryManager,
|
||||
|
@ -16,18 +18,10 @@
|
|||
encodeFilename,
|
||||
int_or_none,
|
||||
parse_http_range,
|
||||
sanitized_Request,
|
||||
try_call,
|
||||
write_xattr,
|
||||
)
|
||||
|
||||
RESPONSE_READ_EXCEPTIONS = (
|
||||
TimeoutError,
|
||||
socket.timeout, # compat: py < 3.10
|
||||
ConnectionError,
|
||||
ssl.SSLError,
|
||||
http.client.HTTPException
|
||||
)
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
|
@ -46,10 +40,7 @@ class DownloadContext(dict):
|
|||
ctx.stream = None
|
||||
|
||||
# Disable compression
|
||||
headers = {'Accept-Encoding': 'identity'}
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test else (
|
||||
|
@ -120,10 +111,10 @@ def establish_connection():
|
|||
if try_call(lambda: range_end >= ctx.content_len):
|
||||
range_end = ctx.content_len - 1
|
||||
|
||||
request = sanitized_Request(url, request_data, headers)
|
||||
request = Request(url, request_data, headers)
|
||||
has_range = range_start is not None
|
||||
if has_range:
|
||||
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
|
||||
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
|
||||
# Establish connection
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
|
@ -154,17 +145,17 @@ def establish_connection():
|
|||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code == 416:
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
|
||||
except HTTPError as err:
|
||||
if err.status == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
ctx.data = self.ydl.urlopen(
|
||||
sanitized_Request(url, request_data, headers))
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
Request(url, request_data, headers))
|
||||
content_length = ctx.data.headers['Content-Length']
|
||||
except HTTPError as err:
|
||||
if err.status < 500 or err.status >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
|
@ -192,17 +183,13 @@ def establish_connection():
|
|||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
elif err.code < 500 or err.code >= 600:
|
||||
elif err.status < 500 or err.status >= 600:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
except urllib.error.URLError as err:
|
||||
if isinstance(err.reason, ssl.CertificateError):
|
||||
except CertificateVerifyError:
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
|
||||
# Any errors that occur during this will not be wrapped by URLError
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
except TransportError as err:
|
||||
raise RetryDownload(err)
|
||||
|
||||
def close_stream():
|
||||
|
@ -212,9 +199,9 @@ def close_stream():
|
|||
ctx.stream = None
|
||||
|
||||
def download():
|
||||
data_len = ctx.data.info().get('Content-length')
|
||||
data_len = ctx.data.headers.get('Content-length')
|
||||
|
||||
if ctx.data.info().get('Content-encoding'):
|
||||
if ctx.data.headers.get('Content-encoding'):
|
||||
# Content-encoding is present, Content-length is not reliable anymore as we are
|
||||
# doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
|
||||
data_len = None
|
||||
|
@ -258,7 +245,7 @@ def retry(e):
|
|||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
except TransportError as err:
|
||||
retry(err)
|
||||
|
||||
byte_counter += len(data_block)
|
||||
|
@ -339,15 +326,15 @@ def retry(e):
|
|||
elif speed:
|
||||
ctx.throttle_start = None
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
||||
|
@ -359,7 +346,7 @@ def retry(e):
|
|||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
import io
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import RetryManager
|
||||
|
||||
u8 = struct.Struct('>B')
|
||||
|
@ -271,7 +271,7 @@ def real_download(self, filename, info_dict):
|
|||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
extra_state['ism_track_written'] = True
|
||||
self._append_fragment(ctx, frag_content)
|
||||
except urllib.error.HTTPError as err:
|
||||
except HTTPError as err:
|
||||
retry.error = err
|
||||
continue
|
||||
|
||||
|
|
|
@ -5,13 +5,8 @@
|
|||
from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
WebSocketsWrapper,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
|
@ -33,7 +28,7 @@ def real_download(self, filename, info_dict):
|
|||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = sanitized_Request(heartbeat_url, heartbeat_data)
|
||||
request = Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
RegexNotFoundError,
|
||||
RetryManager,
|
||||
|
@ -10,6 +10,7 @@
|
|||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class YoutubeLiveChatFD(FragmentFD):
|
||||
|
@ -37,10 +38,7 @@ def real_download(self, filename, info_dict):
|
|||
start_time = int(time.time() * 1000)
|
||||
|
||||
def dl_fragment(url, data=None, headers=None):
|
||||
http_headers = info_dict.get('http_headers', {})
|
||||
if headers:
|
||||
http_headers = http_headers.copy()
|
||||
http_headers.update(headers)
|
||||
http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
|
||||
return self._download_fragment(ctx, url, info_dict, http_headers, data)
|
||||
|
||||
def parse_actions_replay(live_chat_continuation):
|
||||
|
@ -129,7 +127,7 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
|
|||
or frag_index == 1 and try_refresh_replay_beginning
|
||||
or parse_actions_replay)
|
||||
return (True, *func(live_chat_continuation))
|
||||
except urllib.error.HTTPError as err:
|
||||
except HTTPError as err:
|
||||
retry.error = err
|
||||
continue
|
||||
return False, None, None, None
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
|
@ -215,6 +214,7 @@
|
|||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
BiliBiliBangumiSeasonIE,
|
||||
BiliBiliBangumiMediaIE,
|
||||
BiliBiliSearchIE,
|
||||
BilibiliCategoryIE,
|
||||
|
@ -1012,6 +1012,7 @@
|
|||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
|
@ -1141,6 +1142,7 @@
|
|||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .murrtube import MurrtubeIE, MurrtubeUserIE
|
||||
from .museai import MuseAIIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexSongIE,
|
||||
|
@ -1531,6 +1533,7 @@
|
|||
)
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qdance import QDanceIE
|
||||
from .qingting import QingTingIE
|
||||
from .qqmusic import (
|
||||
QQMusicIE,
|
||||
|
@ -1854,6 +1857,10 @@
|
|||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .stacommu import (
|
||||
StacommuLiveIE,
|
||||
StacommuVODIE,
|
||||
)
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .startv import StarTVIE
|
||||
from .steam import (
|
||||
|
@ -1866,7 +1873,6 @@
|
|||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamanity import StreamanityIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streamff import StreamFFIE
|
||||
|
@ -2267,6 +2273,8 @@
|
|||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
VKPlayIE,
|
||||
VKPlayLiveIE,
|
||||
)
|
||||
from .vocaroo import VocarooIE
|
||||
from .vodlocker import VodlockerIE
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
|
@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
|
|||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
'id': '102520540',
|
||||
'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -107,7 +117,7 @@ def _real_extract(self, url):
|
|||
video = True
|
||||
|
||||
if mobj is None:
|
||||
mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage)
|
||||
mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
|
@ -121,7 +131,8 @@ def _real_extract(self, url):
|
|||
urls_info = self._parse_json(
|
||||
mobj.group('json_data'), video_id, transform_source=js_to_json)
|
||||
youtube = mobj.group('type') == 'YouTube'
|
||||
video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4'
|
||||
video = mobj.group('type') == 'Video' or traverse_obj(
|
||||
urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
|
||||
|
||||
if not isinstance(urls_info, list):
|
||||
urls_info = [urls_info]
|
||||
|
|
|
@ -22,80 +22,23 @@
|
|||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
request_to_url,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
|
||||
|
||||
|
||||
def add_opener(ydl, handler):
|
||||
''' Add a handler for opening URLs, like _download_webpage '''
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
ydl._opener.add_handler(handler)
|
||||
|
||||
|
||||
def remove_opener(ydl, handler):
|
||||
'''
|
||||
Remove handler(s) for opening URLs
|
||||
@param handler Either handler object itself or handler type.
|
||||
Specifying handler type will remove all handler which isinstance returns True.
|
||||
'''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
opener = ydl._opener
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
if isinstance(handler, (type, tuple)):
|
||||
find_cp = lambda x: isinstance(x, handler)
|
||||
else:
|
||||
find_cp = lambda x: x is handler
|
||||
|
||||
removed = []
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
i = meth.find("_")
|
||||
protocol = meth[:i]
|
||||
condition = meth[i + 1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
j = condition.find("_") + i + 1
|
||||
kind = meth[j + 1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = opener.handle_error.get(protocol, {})
|
||||
opener.handle_error[protocol] = lookup
|
||||
elif condition == "open":
|
||||
kind = protocol
|
||||
lookup = opener.handle_open
|
||||
elif condition == "response":
|
||||
kind = protocol
|
||||
lookup = opener.process_response
|
||||
elif condition == "request":
|
||||
kind = protocol
|
||||
lookup = opener.process_request
|
||||
else:
|
||||
continue
|
||||
|
||||
handlers = lookup.setdefault(kind, [])
|
||||
if handlers:
|
||||
handlers[:] = [x for x in handlers if not find_cp(x)]
|
||||
|
||||
removed.append(x for x in handlers if find_cp(x))
|
||||
|
||||
if removed:
|
||||
for x in opener.handlers:
|
||||
if find_cp(x):
|
||||
x.add_parent(None)
|
||||
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
|
@ -137,11 +80,11 @@ def _get_videokey_from_ticket(self, ticket):
|
|||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = request_to_url(url)
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': len(response_data),
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
|
@ -213,10 +156,7 @@ def _get_device_token(self):
|
|||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
# don't allow adding it 2 times or more, though it's guarded
|
||||
remove_opener(self._downloader, AbemaLicenseHandler)
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
|
|
|
@ -6,10 +6,8 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
)
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
|
@ -142,9 +140,9 @@ def _perform_login(self, username, password):
|
|||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
e.cause.response.read().decode(), None, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
|
@ -195,14 +193,14 @@ def _real_extract(self, url):
|
|||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError):
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
raise e
|
||||
|
||||
if e.cause.code == 401:
|
||||
if e.cause.status == 401:
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
|
@ -1394,7 +1394,7 @@ def post_form(form_page_res, note, data={}):
|
|||
form_page, urlh = form_page_res
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
if not re.match(r'https?://', post_url):
|
||||
post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
|
||||
post_url = compat_urlparse.urljoin(urlh.url, post_url)
|
||||
form_data = self._hidden_inputs(form_page)
|
||||
form_data.update(data)
|
||||
return self._download_webpage_handle(
|
||||
|
@ -1473,7 +1473,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
elif 'automatically signed in with' in provider_redirect_page:
|
||||
# Seems like comcast is rolling up new way of automatically signing customers
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
'oauth redirect (signed)')
|
||||
# Just need to process the request. No useful data comes back
|
||||
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
|
@ -1619,7 +1619,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
hidden_data['history'] = 1
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending first bookend',
|
||||
urlh.url, video_id, 'Sending first bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
|
@ -1629,7 +1629,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
|
@ -1638,7 +1638,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
hidden_data['history'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
urlh.url, video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
@ -1652,7 +1652,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
hidden_data['history_val'] = 1
|
||||
|
||||
provider_login_redirect_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending First Bookend',
|
||||
urlh.url, video_id, 'Sending First Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_redirect_page, urlh = provider_login_redirect_page_res
|
||||
|
@ -1680,7 +1680,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
|
@ -1690,7 +1690,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
hidden_data['history_val'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending Final Bookend',
|
||||
urlh.url, video_id, 'Sending Final Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
@ -1699,7 +1699,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
# based redirect that should be followed.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, url=urlh.geturl())
|
||||
provider_redirect_page, url=urlh.url)
|
||||
if provider_refresh_redirect_url:
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
|
@ -1724,7 +1724,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
|||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
except ExtractorError as e:
|
||||
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise_mvpd_required()
|
||||
raise
|
||||
if '<pendingLogout' in session:
|
||||
|
|
|
@ -170,8 +170,10 @@ def _real_extract(self, url):
|
|||
continue
|
||||
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
|
||||
if ext == 'm3u8':
|
||||
info['formats'].extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
info['formats'].extend(fmts)
|
||||
self._merge_subtitles(subs, target=info['subtitles'])
|
||||
elif ext == 'f4m':
|
||||
continue
|
||||
# info['formats'].extend(self._extract_f4m_formats(
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
|
@ -121,7 +121,7 @@ def _real_extract(self, url):
|
|||
canonical_url = self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
note='Resolve canonical player URL',
|
||||
errnote='Could not resolve canonical player URL').geturl()
|
||||
errnote='Could not resolve canonical player URL').url
|
||||
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
|
||||
cid = urllib.parse.parse_qs(query)['cid'][0]
|
||||
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_HTTPError, compat_urllib_parse_unquote
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
dict_get,
|
||||
|
@ -899,7 +899,7 @@ def _real_extract(self, url):
|
|||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
||||
else:
|
||||
|
@ -926,7 +926,7 @@ def _real_extract(self, url):
|
|||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.geturl())
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
|
@ -1052,7 +1052,7 @@ def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
|||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
|
|||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError, compat_str, compat_urlparse
|
||||
from ..compat import compat_str, compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
|
@ -277,7 +277,7 @@ def _perform_login(self, username, password):
|
|||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Referer': self._LOGIN_URL})
|
||||
|
||||
if self._LOGIN_URL in urlh.geturl():
|
||||
if self._LOGIN_URL in urlh.url:
|
||||
error = clean_html(get_element_by_class('form-message', response))
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
|
@ -388,8 +388,8 @@ def _process_media_selector(self, media_selection, programme_id):
|
|||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
if not (isinstance(e.exc_info[1], HTTPError)
|
||||
and e.exc_info[1].status in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
|
@ -472,7 +472,7 @@ def _download_playlist(self, playlist_id):
|
|||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
|
@ -983,7 +983,7 @@ def _real_extract(self, url):
|
|||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 500:
|
||||
continue
|
||||
raise
|
||||
if entry:
|
||||
|
|
|
@ -4,11 +4,11 @@
|
|||
import itertools
|
||||
import math
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
|
@ -18,6 +18,7 @@
|
|||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
|
@ -135,6 +136,17 @@ def _get_all_children(self, reply):
|
|||
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
|
||||
yield from children
|
||||
|
||||
def _get_episodes_from_season(self, ss_id, url):
|
||||
season_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', ss_id,
|
||||
note='Downloading season info', query={'season_id': ss_id},
|
||||
headers={'Referer': url, **self.geo_verification_headers()})
|
||||
|
||||
for entry in traverse_obj(season_info, (
|
||||
'result', 'main_section', 'episodes',
|
||||
lambda _, v: url_or_none(v['share_url']) and v['id'])):
|
||||
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
@ -403,76 +415,93 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss897',
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||
'info_dict': {
|
||||
'id': 'ss897',
|
||||
'id': '267851',
|
||||
'ext': 'mp4',
|
||||
'series': '神的记事本',
|
||||
'season': '神的记事本',
|
||||
'season_id': 897,
|
||||
'series': '鬼灭之刃',
|
||||
'series_id': '4358',
|
||||
'season': '鬼灭之刃',
|
||||
'season_id': '26801',
|
||||
'season_number': 1,
|
||||
'episode': '你与旅行包',
|
||||
'episode_number': 2,
|
||||
'title': '神的记事本:第2话 你与旅行包',
|
||||
'duration': 1428.487,
|
||||
'timestamp': 1310809380,
|
||||
'upload_date': '20110716',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'episode': '残酷',
|
||||
'episode_id': '267851',
|
||||
'episode_number': 1,
|
||||
'title': '1 残酷',
|
||||
'duration': 1425.256,
|
||||
'timestamp': 1554566400,
|
||||
'upload_date': '20190406',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
|
||||
'only_matching': True,
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
episode_id = video_id[2:]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
|
||||
or '正在观看预览,大会员免费看全片' in webpage):
|
||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
formats = self.extract_formats(play_info)
|
||||
if (not formats and '成为大会员抢先看' in webpage
|
||||
and play_info.get('durl') and not play_info.get('dash')):
|
||||
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
bangumi_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
|
||||
query={'ep_id': episode_id}, headers=headers)['result']
|
||||
|
||||
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
|
||||
episode_number, episode_info = next((
|
||||
(idx, ep) for idx, ep in enumerate(traverse_obj(
|
||||
bangumi_info, ('episodes', ..., {dict})), 1)
|
||||
if str_or_none(ep.get('id')) == episode_id), (1, {}))
|
||||
|
||||
season_id = bangumi_info.get('season_id')
|
||||
season_number = season_id and next((
|
||||
idx + 1 for idx, e in enumerate(
|
||||
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
|
||||
traverse_obj(bangumi_info, ('seasons', ...)))
|
||||
if e.get('season_id') == season_id
|
||||
), None)
|
||||
|
||||
aid = episode_info.get('aid')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': traverse_obj(initial_state, 'h1Title'),
|
||||
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
|
||||
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
|
||||
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
|
||||
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
|
||||
'season_id': season_id,
|
||||
**traverse_obj(bangumi_info, {
|
||||
'series': ('series', 'series_title', {str}),
|
||||
'series_id': ('series', 'series_id', {str_or_none}),
|
||||
'thumbnail': ('square_cover', {url_or_none}),
|
||||
}),
|
||||
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
|
||||
'episode': episode_info.get('long_title'),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
|
||||
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
|
||||
'timestamp': int_or_none(episode_info.get('pub_time')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(
|
||||
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
|
||||
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
|
||||
'http_headers': {'Referer': url, **self.geo_verification_headers()},
|
||||
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
|
@ -485,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
ss_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
episode_list = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', media_id,
|
||||
query={'season_id': initial_state['mediaInfo']['season_id']},
|
||||
note='Downloading season info')['result']['main_section']['episodes']
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
|
||||
|
||||
return self.playlist_result((
|
||||
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
|
||||
for entry in episode_list), media_id)
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
'id': '26801'
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ss_id = self._match_id(url)
|
||||
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
|
@ -575,7 +614,7 @@ def fetch_page(page_idx):
|
|||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from .adobepass import AdobePassIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_class,
|
||||
|
@ -155,7 +155,7 @@ def _real_extract(self, url):
|
|||
chapters = None
|
||||
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').geturl()
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
|
||||
if 'mpeg_cenc' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
|
|
@ -7,10 +7,10 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
|
@ -915,8 +915,8 @@ def extract_policy_key():
|
|||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
|
|
@ -64,7 +64,7 @@ def _real_extract(self, url):
|
|||
# response = self._request_webpage(
|
||||
# HEADRequest(fmt_url), video_id,
|
||||
# 'Checking if the video is georestricted')
|
||||
# if '/blocage' in response.geturl():
|
||||
# if '/blocage' in response.url:
|
||||
# raise ExtractorError(
|
||||
# 'The video is not available in your country',
|
||||
# expected=True)
|
||||
|
|
|
@ -7,9 +7,9 @@
|
|||
from .anvato import AnvatoIE
|
||||
from .common import InfoExtractor
|
||||
from .paramountplus import ParamountPlusIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
|
|
|
@ -1,20 +1,20 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
USER_AGENTS = {
|
||||
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
|
||||
}
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
|
@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.url)
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
|
@ -163,16 +163,16 @@ def _real_extract(self, url):
|
|||
entries = []
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
req = Request(
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.headers['Content-type'] = 'application/x-www-form-urlencoded'
|
||||
req.headers['x-addr'] = '127.0.0.1'
|
||||
req.headers['X-Requested-With'] = 'XMLHttpRequest'
|
||||
if user_agent:
|
||||
req.add_header('User-Agent', user_agent)
|
||||
req.add_header('Referer', url)
|
||||
req.headers['User-Agent'] = user_agent
|
||||
req.headers['Referer'] = url
|
||||
|
||||
playlistpage = self._download_json(req, playlist_id, fatal=False)
|
||||
|
||||
|
@ -183,8 +183,8 @@ def _real_extract(self, url):
|
|||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
req = Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.headers['Referer'] = url
|
||||
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
import urllib.error
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
|
@ -40,7 +40,7 @@ def _real_extract(self, url):
|
|||
'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500)
|
||||
if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
|
||||
or isinstance(e.cause, json.JSONDecodeError)):
|
||||
self.raise_login_required(method='cookies')
|
||||
raise
|
||||
|
|
|
@ -33,7 +33,7 @@ def _real_extract(self, url):
|
|||
if rcid:
|
||||
webpage = self._download_webpage(url, None, note='Getting video ID')
|
||||
url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
|
||||
url = self._request_webpage(url, None, note='Resolving final URL').geturl()
|
||||
url = self._request_webpage(url, None, note='Resolving final URL').url
|
||||
mobj = self._match_valid_url(url)
|
||||
subdomain = mobj.group('subdomain')
|
||||
siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
|
||||
|
@ -49,7 +49,7 @@ def _real_extract(self, url):
|
|||
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
|
||||
video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
|
||||
|
||||
if urlh.getcode() == 403:
|
||||
if urlh.status == 403:
|
||||
if stream['code'] == 53004:
|
||||
self.raise_login_required()
|
||||
if stream['code'] == 53005:
|
||||
|
@ -59,7 +59,7 @@ def _real_extract(self, url):
|
|||
'This video is protected by a password, use the --video-password option', expected=True)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
|
||||
|
||||
if urlh.getcode() == 429:
|
||||
if urlh.status == 429:
|
||||
self.raise_login_required(
|
||||
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
|
||||
method='cookies')
|
||||
|
|
|
@ -17,15 +17,26 @@
|
|||
import sys
|
||||
import time
|
||||
import types
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_os_name,
|
||||
urllib_req_to_req,
|
||||
)
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..downloader.hls import HlsFD
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..utils import (
|
||||
IDENTITY,
|
||||
JSON_LD_RE,
|
||||
|
@ -34,7 +45,6 @@
|
|||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
HEADRequest,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
|
@ -60,7 +70,6 @@
|
|||
js_to_json,
|
||||
mimetype2ext,
|
||||
netrc_from_content,
|
||||
network_exceptions,
|
||||
orderedSet,
|
||||
parse_bitrate,
|
||||
parse_codecs,
|
||||
|
@ -70,7 +79,6 @@
|
|||
parse_resolution,
|
||||
sanitize_filename,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
|
@ -82,8 +90,6 @@
|
|||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
|
@ -224,7 +230,8 @@ class InfoExtractor:
|
|||
width : height ratio as float.
|
||||
* no_resume The server does not support resuming the
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* has_drm The format has DRM and cannot be downloaded. Boolean
|
||||
* has_drm True if the format has DRM and cannot be downloaded.
|
||||
'maybe' if the format may have DRM and has to be tested before download.
|
||||
* extra_param_to_segment_url A query string to append to each
|
||||
fragment's URL, or to update each existing query string
|
||||
with. Only applied by the native HLS/DASH downloaders.
|
||||
|
@ -726,7 +733,7 @@ def extract(self, url):
|
|||
e.ie = e.ie or self.IE_NAME,
|
||||
e.traceback = e.traceback or sys.exc_info()[2]
|
||||
raise
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
|
||||
except (KeyError, StopIteration) as e:
|
||||
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
|
||||
|
@ -785,20 +792,25 @@ def IE_NAME(cls):
|
|||
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
assert isinstance(err, urllib.error.HTTPError)
|
||||
assert isinstance(err, HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
return expected_status(err.status) is True
|
||||
else:
|
||||
return err.code in variadic(expected_status)
|
||||
return err.status in variadic(expected_status)
|
||||
|
||||
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
||||
if isinstance(url_or_request, urllib.request.Request):
|
||||
return update_Request(url_or_request, data=data, headers=headers, query=query)
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
return sanitized_Request(url_or_request, data, headers or {})
|
||||
self._downloader.deprecation_warning(
|
||||
'Passing a urllib.request.Request to _create_request() is deprecated. '
|
||||
'Use yt_dlp.networking.common.Request instead.')
|
||||
url_or_request = urllib_req_to_req(url_or_request)
|
||||
elif not isinstance(url_or_request, Request):
|
||||
url_or_request = Request(url_or_request)
|
||||
|
||||
url_or_request.update(data=data, headers=headers, query=query)
|
||||
return url_or_request
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
||||
"""
|
||||
|
@ -834,14 +846,9 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
|
|||
try:
|
||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, urllib.error.HTTPError):
|
||||
if isinstance(err, HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
return err.response
|
||||
|
||||
if errnote is False:
|
||||
return False
|
||||
|
@ -973,11 +980,11 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
|
|||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
self.to_screen('Dumping request to ' + urlh.geturl())
|
||||
self.to_screen('Dumping request to ' + urlh.url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.geturl(), video_id)
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
|
@ -1035,7 +1042,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
|
|||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.full_url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
|
@ -1109,7 +1116,7 @@ def _download_webpage(
|
|||
while True:
|
||||
try:
|
||||
return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
try_count += 1
|
||||
if try_count >= tries:
|
||||
raise e
|
||||
|
@ -1806,7 +1813,7 @@ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=
|
|||
return []
|
||||
|
||||
manifest, urlh = res
|
||||
manifest_url = urlh.geturl()
|
||||
manifest_url = urlh.url
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
|
||||
|
@ -1965,7 +1972,7 @@ def _extract_m3u8_formats_and_subtitles(
|
|||
return [], {}
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
m3u8_url = urlh.url
|
||||
|
||||
return self._parse_m3u8_formats_and_subtitles(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
|
@ -1979,11 +1986,7 @@ def _parse_m3u8_formats_and_subtitles(
|
|||
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||
video_id=None):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), m3u8_doc)
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
@ -2247,7 +2250,7 @@ def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4
|
|||
return [], {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
smil_url = urlh.url
|
||||
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
|
@ -2270,7 +2273,7 @@ def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
|
|||
return {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
smil_url = urlh.url
|
||||
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
|
@ -2462,7 +2465,7 @@ def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
|
|||
return []
|
||||
|
||||
xspf, urlh = res
|
||||
xspf_url = urlh.geturl()
|
||||
xspf_url = urlh.url
|
||||
|
||||
return self._parse_xspf(
|
||||
xspf, playlist_id, xspf_url=xspf_url,
|
||||
|
@ -2533,7 +2536,7 @@ def _extract_mpd_formats_and_subtitles(
|
|||
return [], {}
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.geturl()
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
|
@ -2919,7 +2922,7 @@ def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, not
|
|||
if ism_doc is None:
|
||||
return [], {}
|
||||
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
|
||||
|
||||
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
|
@ -113,7 +113,7 @@ def _real_extract(self, url):
|
|||
errnote='Unable to download media JSON')
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
continue
|
||||
raise
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import base64
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
|
@ -27,11 +27,24 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
_AUTH_HEADERS = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_QUERY = {}
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
'': 'en-US',
|
||||
'es': 'es-419',
|
||||
'es-es': 'es-ES',
|
||||
'fr': 'fr-FR',
|
||||
'it': 'it-IT',
|
||||
'pt-br': 'pt-BR',
|
||||
'pt-pt': 'pt-PT',
|
||||
'ru': 'ru-RU',
|
||||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return self._get_cookies(self._BASE_URL).get('etp_rt')
|
||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
|
@ -62,49 +75,49 @@ def _perform_login(self, username, password):
|
|||
if not self.is_logged_in:
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _update_query(self, lang):
|
||||
if lang in CrunchyrollBaseIE._QUERY:
|
||||
return
|
||||
|
||||
webpage = self._download_webpage(
|
||||
f'{self._BASE_URL}/{lang}', None, note=f'Retrieving main page (lang={lang or None})')
|
||||
|
||||
initial_state = self._search_json(r'__INITIAL_STATE__\s*=', webpage, 'initial state', None)
|
||||
CrunchyrollBaseIE._QUERY[lang] = traverse_obj(initial_state, {
|
||||
'locale': ('localization', 'locale'),
|
||||
}) or None
|
||||
|
||||
if CrunchyrollBaseIE._BASIC_AUTH:
|
||||
return
|
||||
|
||||
app_config = self._search_json(r'__APP_CONFIG__\s*=', webpage, 'app config', None)
|
||||
cx_api_param = app_config['cxApiParams']['accountAuthClientId' if self.is_logged_in else 'anonClientId']
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||
return
|
||||
|
||||
assert CrunchyrollBaseIE._BASIC_AUTH, '_update_query needs to be called at least one time beforehand'
|
||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
|
||||
|
||||
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
|
||||
self._update_query(lang)
|
||||
self._update_auth()
|
||||
|
||||
if not endpoint.startswith('/'):
|
||||
endpoint = f'/{endpoint}'
|
||||
|
||||
query = query.copy()
|
||||
locale = self._locale_from_language(lang)
|
||||
if locale:
|
||||
query['locale'] = locale
|
||||
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query={**CrunchyrollBaseIE._QUERY[lang], **query})
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
|
||||
|
||||
def _call_api(self, path, internal_id, lang, note='api', query={}):
|
||||
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
|
||||
|
@ -114,7 +127,7 @@ def _call_api(self, path, internal_id, lang, note='api', query={}):
|
|||
result = self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, urllib.error.HTTPError) and error.cause.code == 404:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
|
@ -206,7 +219,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
|
||||
watch/(?!concert|musicvideo)(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
# Premium only
|
||||
|
@ -304,7 +317,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
|
@ -490,8 +503,21 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w{10})'''
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV5B02C79',
|
||||
'display_id': 'egaono-hana',
|
||||
'title': 'Egaono Hana',
|
||||
'track': 'Egaono Hana',
|
||||
'artist': 'Goose house',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genre': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
|
@ -519,11 +545,14 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
)
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CultureUnpluggedIE(InfoExtractor):
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import hashlib
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
classproperty,
|
||||
|
@ -105,7 +105,7 @@ def _real_extract(self, url):
|
|||
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
|
||||
except ExtractorError as e:
|
||||
# CDN will randomly respond with 403
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
|
@ -68,9 +68,9 @@ def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
|
|||
None, 'Downloading Access Token',
|
||||
data=urlencode_postdata(data))['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read().decode(), xid)['error_description'], expected=True)
|
||||
e.cause.response.read().decode(), xid)['error_description'], expected=True)
|
||||
raise
|
||||
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||
self._HEADERS['Authorization'] = 'Bearer ' + token
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
|
@ -100,9 +100,9 @@ def _real_extract(self, url):
|
|||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
e.cause.read().decode(), display_id)['description']
|
||||
e.cause.response.read().decode(), display_id)['description']
|
||||
if 'resource not available for country' in e_description:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
if 'Authorized Networks' in e_description:
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
|
@ -39,7 +39,7 @@ def _get_auth(self, disco_base, display_id, realm, needs_device_id=True):
|
|||
return f'Bearer {token}'
|
||||
|
||||
def _process_errors(self, e, geo_countries):
|
||||
info = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||
info = self._parse_json(e.cause.response.read().decode('utf-8'), None)
|
||||
error = info['errors'][0]
|
||||
error_code = error.get('code')
|
||||
if error_code == 'access.denied.geoblocked':
|
||||
|
@ -87,7 +87,7 @@ def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domai
|
|||
'include': 'images,primaryChannel,show,tags'
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
video_id = video['data']['id']
|
||||
|
@ -99,7 +99,7 @@ def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domai
|
|||
streaming = self._download_video_playback_info(
|
||||
disco_base, video_id, headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
self._process_errors(e, geo_countries)
|
||||
raise
|
||||
for format_dict in streaming:
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
@ -111,8 +111,8 @@ def _download_json(self, url_or_request, video_id, *args, **kwargs):
|
|||
response = super(EaglePlatformIE, self)._download_json(
|
||||
url_or_request, video_id, *args, **kwargs)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError):
|
||||
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id)
|
||||
if isinstance(ee.cause, HTTPError):
|
||||
response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
|
||||
self._handle_error(response)
|
||||
raise
|
||||
return response
|
||||
|
|
|
@ -1,10 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601
|
||||
|
||||
|
||||
class EitbIE(InfoExtractor):
|
||||
|
@ -54,7 +50,7 @@ def _real_extract(self, url):
|
|||
|
||||
hls_url = media.get('HLS_SURL')
|
||||
if hls_url:
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
|
||||
headers={'Referer': url})
|
||||
token_data = self._download_json(
|
||||
|
|
|
@ -52,7 +52,7 @@ def _real_extract(self, url):
|
|||
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
|
||||
video_id = self._match_id(urlh.geturl())
|
||||
video_id = self._match_id(urlh.url)
|
||||
|
||||
hash = self._search_regex(
|
||||
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||
|
|
|
@ -41,7 +41,7 @@ def _real_extract(self, url):
|
|||
'device': 'desktop',
|
||||
})
|
||||
|
||||
stream_response = self._download_json(player_settings['streamAccess'], video_id, data={})
|
||||
stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'')
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
stream_response['data']['stream'], video_id, 'mp4')
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import network_exceptions
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
|
@ -19,11 +21,10 @@
|
|||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
network_exceptions,
|
||||
parse_count,
|
||||
parse_qs,
|
||||
qualities,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
|
@ -90,16 +91,16 @@ class FacebookIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '274175099429670',
|
||||
'ext': 'mp4',
|
||||
'title': 'Asif Nawab Butt',
|
||||
'description': 'Asif Nawab Butt',
|
||||
'title': 'Asif',
|
||||
'description': '',
|
||||
'uploader': 'Asif Nawab Butt',
|
||||
'upload_date': '20140506',
|
||||
'timestamp': 1399398998,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
|
||||
'duration': 131.03,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'title'
|
||||
]
|
||||
}, {
|
||||
'note': 'Video with DASH manifest',
|
||||
'url': 'https://www.facebook.com/video.php?v=957955867617029',
|
||||
|
@ -151,7 +152,7 @@ class FacebookIE(InfoExtractor):
|
|||
# have 1080P, but only up to 720p in swf params
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||
'md5': '3f3798adb2b73423263e59376f1f5eb7',
|
||||
'md5': 'ca63897a90c9452efee5f8c40d080e25',
|
||||
'info_dict': {
|
||||
'id': '10155529876156509',
|
||||
'ext': 'mp4',
|
||||
|
@ -162,6 +163,9 @@ class FacebookIE(InfoExtractor):
|
|||
'uploader': 'CNN',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
'uploader_id': '100059479812265',
|
||||
'concurrent_view_count': int,
|
||||
'duration': 44.478,
|
||||
},
|
||||
}, {
|
||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||
|
@ -170,12 +174,16 @@ class FacebookIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '1417995061575415',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео',
|
||||
'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
|
||||
'description': 'Довгоочікуване відео',
|
||||
'timestamp': 1486648771,
|
||||
'timestamp': 1486648217,
|
||||
'upload_date': '20170209',
|
||||
'uploader': 'Yaroslav Korpan',
|
||||
'uploader_id': '100000948048708',
|
||||
'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
|
||||
'concurrent_view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'view_count': int,
|
||||
'duration': 11736.446,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -192,9 +200,7 @@ class FacebookIE(InfoExtractor):
|
|||
'uploader': 'La Guía Del Varón',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||
|
@ -208,9 +214,7 @@ class FacebookIE(InfoExtractor):
|
|||
'uploader': 'Elisabeth Ahtn',
|
||||
'uploader_id': '100013949973717',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
|
@ -252,7 +256,11 @@ class FacebookIE(InfoExtractor):
|
|||
'timestamp': 1527084179,
|
||||
'upload_date': '20180523',
|
||||
'uploader': 'ESL One Dota 2',
|
||||
'uploader_id': '234218833769558',
|
||||
'uploader_id': '100066514874195',
|
||||
'duration': 4524.212,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -262,8 +270,17 @@ class FacebookIE(InfoExtractor):
|
|||
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
|
||||
'info_dict': {
|
||||
'id': '106560053808006',
|
||||
'ext': 'mp4',
|
||||
'title': 'Josef',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'concurrent_view_count': int,
|
||||
'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
|
||||
'timestamp': 1549275572,
|
||||
'duration': 3.413,
|
||||
'uploader': 'Josef Novak',
|
||||
'description': '',
|
||||
'upload_date': '20190204',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# data.video.story.attachments[].media
|
||||
'url': 'https://www.facebook.com/watch/?v=647537299265662',
|
||||
|
@ -276,6 +293,7 @@ class FacebookIE(InfoExtractor):
|
|||
'id': '10157667649866271',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
|
||||
|
@ -319,7 +337,7 @@ class FacebookIE(InfoExtractor):
|
|||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page_req = sanitized_Request(self._LOGIN_URL)
|
||||
login_page_req = Request(self._LOGIN_URL)
|
||||
self._set_cookie('facebook.com', 'locale', 'en_US')
|
||||
login_page = self._download_webpage(login_page_req, None,
|
||||
note='Downloading login page',
|
||||
|
@ -340,8 +358,8 @@ def _perform_login(self, username, password):
|
|||
'timezone': '-60',
|
||||
'trynum': '1',
|
||||
}
|
||||
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
|
||||
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
try:
|
||||
login_results = self._download_webpage(request, None,
|
||||
note='Logging in', errnote='unable to fetch login page')
|
||||
|
@ -367,8 +385,8 @@ def _perform_login(self, username, password):
|
|||
'h': h,
|
||||
'name_action_selected': 'dont_save',
|
||||
}
|
||||
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
|
||||
check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
check_response = self._download_webpage(check_req, None,
|
||||
note='Confirming login')
|
||||
if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
|
||||
|
@ -497,6 +515,13 @@ def extract_relay_prefetched_data(_filter):
|
|||
entries = []
|
||||
|
||||
def parse_graphql_video(video):
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
reel_info = traverse_obj(
|
||||
video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
|
||||
if reel_info:
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
|
@ -513,15 +538,15 @@ def parse_graphql_video(video):
|
|||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
v_id = video.get('videoId') or video.get('id') or video_id
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
'thumbnail': traverse_obj(
|
||||
video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
|
||||
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||
'timestamp': int_or_none(video.get('publish_time')),
|
||||
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||
'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
|
||||
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
||||
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
||||
or float_or_none(video.get('length_in_second'))),
|
||||
}
|
||||
process_formats(info)
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
|
@ -782,18 +807,18 @@ class FacebookReelIE(InfoExtractor):
|
|||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/reel/1195289147628387',
|
||||
'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831',
|
||||
'md5': 'f13dd37f2633595982db5ed8765474d3',
|
||||
'info_dict': {
|
||||
'id': '1195289147628387',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:9f5b142921b2dc57004fa13f76005f87',
|
||||
'description': 'md5:24ea7ef062215d295bdde64e778f5474',
|
||||
'uploader': 'Beast Camp Training',
|
||||
'uploader_id': '1738535909799870',
|
||||
'duration': 9.536,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
|
||||
'description': 'md5:22f03309b216ac84720183961441d8db',
|
||||
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
|
||||
'uploader_id': '100040874179269',
|
||||
'duration': 9.579,
|
||||
'timestamp': 1637502609,
|
||||
'upload_date': '20211121',
|
||||
'timestamp': 1637502604,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
}
|
||||
}]
|
||||
|
||||
|
|
|
@ -3,11 +3,11 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..dependencies import websockets
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
WebSocketsWrapper,
|
||||
js_to_json,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
|
@ -57,7 +57,7 @@ def _login(self):
|
|||
}
|
||||
|
||||
login_data = urlencode_postdata(login_form_strs)
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
|
||||
|
||||
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
|
||||
|
@ -66,7 +66,7 @@ def _login(self):
|
|||
return False
|
||||
|
||||
# this is also needed
|
||||
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
|
||||
self._download_webpage(
|
||||
login_redir, None, note='Login redirect', errnote='Login redirect failed')
|
||||
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_HTTPError,
|
||||
)
|
||||
from ..compat import compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
qualities,
|
||||
strip_or_none,
|
||||
|
@ -40,8 +38,8 @@ def _real_extract(self, url):
|
|||
'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
|
||||
video_id)['response']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason']
|
||||
if isinstance(e.cause, HTTPError):
|
||||
errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
|
@ -124,8 +122,8 @@ def _real_extract(self, url):
|
|||
channel_data = self._download_json(
|
||||
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError):
|
||||
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message']
|
||||
if isinstance(e.cause, HTTPError):
|
||||
errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message']
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
|
||||
raise
|
||||
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
@ -68,9 +68,9 @@ def _call_api(self, path, video_id, data=None):
|
|||
'https://api3.fox.com/v2.0/' + path,
|
||||
video_id, data=data, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
entitlement_issues = self._parse_json(
|
||||
e.cause.read().decode(), video_id)['entitlementIssues']
|
||||
e.cause.response.read().decode(), video_id)['entitlementIssues']
|
||||
for e in entitlement_issues:
|
||||
if e.get('errorCode') == 1005:
|
||||
raise ExtractorError(
|
||||
|
@ -123,8 +123,8 @@ def _real_extract(self, url):
|
|||
try:
|
||||
m3u8_url = self._download_json(release_url, video_id)['playURL']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
error = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read().decode(), video_id)
|
||||
if error.get('exception') == 'GeoLocationBlocked':
|
||||
self.raise_geo_restricted(countries=['US'])
|
||||
raise ExtractorError(error['description'], expected=True)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import float_or_none, make_archive_id, smuggle_url
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
|
@ -35,7 +36,7 @@ def _real_extract(self, url):
|
|||
'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
|
||||
})
|
||||
preplay_url = self._request_webpage(
|
||||
HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl()
|
||||
HEADRequest(data['url']), video_id, 'Fetching preplay URL').url
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from ..utils import HEADRequest
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
|
||||
|
||||
class FujiTVFODPlus7IE(InfoExtractor):
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
import string
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
|
@ -46,8 +46,8 @@ def _perform_login(self, username, password):
|
|||
}))
|
||||
FunimationBaseIE._TOKEN = data['token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
error = self._parse_json(e.cause.read().decode(), None)['error']
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
error = self._parse_json(e.cause.response.read().decode(), None)['error']
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
|
|
|
@ -2,13 +2,8 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
remove_start,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..utils import remove_start, smuggle_url, urlencode_postdata
|
||||
|
||||
|
||||
class GDCVaultIE(InfoExtractor):
|
||||
|
@ -138,8 +133,8 @@ def _login(self, webpage_url, display_id):
|
|||
'password': password,
|
||||
}
|
||||
|
||||
request = sanitized_Request(login_url, urlencode_postdata(login_form))
|
||||
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
request = Request(login_url, urlencode_postdata(login_form))
|
||||
request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
self._download_webpage(request, display_id, 'Logging in')
|
||||
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
|
||||
self._download_webpage(logout_url, display_id, 'Logging out')
|
||||
|
@ -163,7 +158,7 @@ def _real_extract(self, url):
|
|||
video_url = 'http://www.gdcvault.com' + direct_url
|
||||
# resolve the url so that we can detect the correct extension
|
||||
video_url = self._request_webpage(
|
||||
HEADRequest(video_url), video_id).geturl()
|
||||
HEADRequest(video_url), video_id).url
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -2431,7 +2431,7 @@ def _real_extract(self, url):
|
|||
'Accept-Encoding': 'identity',
|
||||
**smuggled_data.get('http_headers', {})
|
||||
})
|
||||
new_url = full_response.geturl()
|
||||
new_url = full_response.url
|
||||
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
|
||||
if new_url != extract_basic_auth(url)[0]:
|
||||
self.report_following_redirect(new_url)
|
||||
|
@ -2529,12 +2529,12 @@ def _real_extract(self, url):
|
|||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=full_response.geturl()),
|
||||
xspf_base_url=full_response.url),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
|
||||
doc,
|
||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||
mpd_base_url=full_response.url.rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
self.report_detected('DASH manifest')
|
||||
|
@ -2562,7 +2562,7 @@ def _real_extract(self, url):
|
|||
self._downloader.write_debug('Looking for embeds')
|
||||
embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
|
||||
if len(embeds) == 1:
|
||||
return {**info_dict, **embeds[0]}
|
||||
return merge_dicts(embeds[0], info_dict)
|
||||
elif embeds:
|
||||
return self.playlist_result(embeds, **info_dict)
|
||||
raise UnsupportedError(url)
|
||||
|
@ -2572,7 +2572,7 @@ def _extract_embeds(self, url, webpage, *, urlh=None, info_dict={}):
|
|||
info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation
|
||||
video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
actual_url = urlh.geturl() if urlh else url
|
||||
actual_url = urlh.url if urlh else url
|
||||
|
||||
# Sometimes embedded video player is hidden behind percent encoding
|
||||
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
from ..compat import (
|
||||
compat_str,
|
||||
)
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
orderedSet,
|
||||
|
|
|
@ -5,7 +5,9 @@
|
|||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
try_get,
|
||||
|
@ -34,6 +36,7 @@ class GoogleDriveIE(InfoExtractor):
|
|||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny.mp4',
|
||||
'duration': 45,
|
||||
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
}
|
||||
}, {
|
||||
# video can't be watched anonymously due to view count limit reached,
|
||||
|
@ -207,10 +210,10 @@ def get_value(key):
|
|||
'export': 'download',
|
||||
})
|
||||
|
||||
def request_source_file(source_url, kind):
|
||||
def request_source_file(source_url, kind, data=None):
|
||||
return self._request_webpage(
|
||||
source_url, video_id, note='Requesting %s file' % kind,
|
||||
errnote='Unable to request %s file' % kind, fatal=False)
|
||||
errnote='Unable to request %s file' % kind, fatal=False, data=data)
|
||||
urlh = request_source_file(source_url, 'source')
|
||||
if urlh:
|
||||
def add_source_format(urlh):
|
||||
|
@ -225,7 +228,7 @@ def add_source_format(urlh):
|
|||
# Using original URLs may result in redirect loop due to
|
||||
# google.com's cookies mistakenly used for googleusercontent.com
|
||||
# redirect URLs (see #23919).
|
||||
'url': urlh.geturl(),
|
||||
'url': urlh.url,
|
||||
'ext': determine_ext(title, 'mp4').lower(),
|
||||
'format_id': 'source',
|
||||
'quality': 1,
|
||||
|
@ -237,14 +240,10 @@ def add_source_format(urlh):
|
|||
urlh, url, video_id, note='Downloading confirmation page',
|
||||
errnote='Unable to confirm download', fatal=False)
|
||||
if confirmation_webpage:
|
||||
confirm = self._search_regex(
|
||||
r'confirm=([^&"\']+)', confirmation_webpage,
|
||||
'confirmation code', default=None)
|
||||
if confirm:
|
||||
confirmed_source_url = update_url_query(source_url, {
|
||||
'confirm': confirm,
|
||||
})
|
||||
urlh = request_source_file(confirmed_source_url, 'confirmed source')
|
||||
confirmed_source_url = extract_attributes(
|
||||
get_element_html_by_id('download-form', confirmation_webpage) or '').get('action')
|
||||
if confirmed_source_url:
|
||||
urlh = request_source_file(confirmed_source_url, 'confirmed source', data=b'')
|
||||
if urlh and urlh.headers.get('Content-Disposition'):
|
||||
add_source_format(urlh)
|
||||
else:
|
||||
|
|
|
@ -126,7 +126,7 @@ def _real_extract(self, url):
|
|||
# If we ever wanted to provide the final resolved URL that
|
||||
# does not require cookies, albeit with a shorter lifespan:
|
||||
# urlh = self._downloader.urlopen(file_url)
|
||||
# resolved_url = urlh.geturl()
|
||||
# resolved_url = urlh.url
|
||||
label = fmt.get('label')
|
||||
h = self._FORMAT_HEIGHTS.get(label)
|
||||
w = h * width // height if h and width and height else None
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
sanitized_Request,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..utils import ExtractorError, urlencode_postdata
|
||||
|
||||
|
||||
class HotNewHipHopIE(InfoExtractor):
|
||||
|
@ -36,9 +32,9 @@ def _real_extract(self, url):
|
|||
('mediaType', 's'),
|
||||
('mediaId', video_id),
|
||||
])
|
||||
r = sanitized_Request(
|
||||
r = Request(
|
||||
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
|
||||
r.add_header('Content-Type', 'application/x-www-form-urlencoded')
|
||||
r.headers['Content-Type'] = 'application/x-www-form-urlencoded'
|
||||
mkd = self._download_json(
|
||||
r, video_id, note='Requesting media key',
|
||||
errnote='Could not download media key')
|
||||
|
@ -50,7 +46,7 @@ def _real_extract(self, url):
|
|||
req = self._request_webpage(
|
||||
redirect_req, video_id,
|
||||
note='Resolving final URL', errnote='Could not resolve final URL')
|
||||
video_url = req.geturl()
|
||||
video_url = req.url
|
||||
if video_url.endswith('.html'):
|
||||
raise ExtractorError('Redirect failed')
|
||||
|
||||
|
|
|
@ -6,7 +6,8 @@
|
|||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError, compat_str
|
||||
from ..compat import compat_str
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
|
@ -233,7 +234,7 @@ def _real_extract(self, url):
|
|||
'height': int_or_none(playback_set.get('height')),
|
||||
}]
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
geo_restricted = True
|
||||
continue
|
||||
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
sanitized_Request,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
@ -42,7 +42,7 @@ def _initialize_pre_login(self):
|
|||
'application_version': self._APP_VERSION
|
||||
}
|
||||
|
||||
req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
|
||||
req = Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
|
||||
req.get_method = lambda: 'PUT'
|
||||
|
||||
resources = self._download_json(
|
||||
|
@ -73,8 +73,8 @@ def _perform_login(self, username, password):
|
|||
self._login_url, None, note='Logging in', errnote='Unable to log in',
|
||||
data=json.dumps(auth_data).encode('utf-8'))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406:
|
||||
auth_info = self._parse_json(e.cause.read().encode('utf-8'), None)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 406:
|
||||
auth_info = self._parse_json(e.cause.response.read().encode('utf-8'), None)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_parse_qs
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
|
@ -27,9 +28,9 @@ def _checked_call_api(self, slug):
|
|||
try:
|
||||
return self._call_api(slug)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
e.cause.args = e.cause.args or [
|
||||
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||
e.cause.response.url, e.cause.status, e.cause.reason]
|
||||
raise ExtractorError(
|
||||
'Content not found: expired?', cause=e.cause,
|
||||
expected=True)
|
||||
|
@ -226,7 +227,7 @@ def _real_extract(self, url):
|
|||
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(embed_url, video_id)
|
||||
new_url = urlh.geturl()
|
||||
new_url = urlh.url
|
||||
ign_url = compat_parse_qs(
|
||||
urllib.parse.urlparse(new_url).query).get('url', [None])[-1]
|
||||
if ign_url:
|
||||
|
@ -323,14 +324,14 @@ def _checked_call_api(self, slug):
|
|||
try:
|
||||
return self._call_api(slug)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError):
|
||||
if isinstance(e.cause, HTTPError):
|
||||
e.cause.args = e.cause.args or [
|
||||
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||
if e.cause.code == 404:
|
||||
e.cause.response.url, e.cause.status, e.cause.reason]
|
||||
if e.cause.status == 404:
|
||||
raise ExtractorError(
|
||||
'Content not found: expired?', cause=e.cause,
|
||||
expected=True)
|
||||
elif e.cause.code == 503:
|
||||
elif e.cause.status == 503:
|
||||
self.report_warning(error_to_compat_str(e.cause))
|
||||
return
|
||||
raise
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
@ -52,9 +52,9 @@ def _extract_dve_api_url(self, media_id, media_type):
|
|||
return self._call_api(
|
||||
stream_path, media_id)['playerUrlCallback']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
|
||||
self._parse_json(e.cause.response.read().decode(), media_id)['messages'][0],
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
decode_base_n,
|
||||
|
@ -442,7 +442,7 @@ def _real_extract(self, url):
|
|||
shared_data = self._search_json(
|
||||
r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {}
|
||||
|
||||
if shared_data and self._LOGIN_URL not in urlh.geturl():
|
||||
if shared_data and self._LOGIN_URL not in urlh.url:
|
||||
media.update(traverse_obj(
|
||||
shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
|
||||
('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
|
||||
|
@ -589,7 +589,7 @@ def _extract_graphql(self, data, url):
|
|||
except ExtractorError as e:
|
||||
# if it's an error caused by a bad query, and there are
|
||||
# more GIS templates to try, ignore it and keep trying
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
if gis_tmpl != gis_tmpls[-1]:
|
||||
continue
|
||||
raise
|
||||
|
|
|
@ -81,7 +81,7 @@ def _perform_login(self, username, password):
|
|||
note='Logging in')
|
||||
|
||||
# a profile may need to be selected first, even when there is only a single one
|
||||
if '/profile-select' in login_handle.geturl():
|
||||
if '/profile-select' in login_handle.url:
|
||||
profile_id = self._search_regex(
|
||||
r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
|
||||
|
||||
|
@ -89,7 +89,7 @@ def _perform_login(self, username, password):
|
|||
f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
|
||||
query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
|
||||
|
||||
code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0))
|
||||
code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
|
||||
if not code:
|
||||
raise ExtractorError('Login failed', expected=True)
|
||||
|
||||
|
|
|
@ -527,7 +527,7 @@ def _extract_vms_player_js(self, webpage, video_id):
|
|||
if player_js_cache:
|
||||
return player_js_cache
|
||||
webpack_js_url = self._proto_relative_url(self._search_regex(
|
||||
r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
|
||||
r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
|
||||
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
|
||||
webpack_map = self._search_json(
|
||||
r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
@ -101,8 +101,8 @@ def _real_extract(self, url):
|
|||
cdn_api_base, video_id, query=query,
|
||||
note='Downloading video URL for profile %s' % profile_name)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
resp = self._parse_json(e.cause.read().decode(), video_id)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
resp = self._parse_json(e.cause.response.read().decode(), video_id)
|
||||
if resp.get('code') == 'GeoBlocked':
|
||||
self.raise_geo_restricted()
|
||||
raise
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
UserNotLive,
|
||||
float_or_none,
|
||||
merge_dicts,
|
||||
|
@ -30,7 +29,7 @@ def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, *
|
|||
|
||||
|
||||
class KickIE(KickBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w_]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/yuppy',
|
||||
'info_dict': {
|
||||
|
|
|
@ -91,7 +91,7 @@ def _real_extract(self, url):
|
|||
webpage, urlh = self._download_webpage_handle(
|
||||
url, song_id, note='Download song detail info',
|
||||
errnote='Unable to get song detail info')
|
||||
if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
|
||||
if song_id not in urlh.url or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
|
||||
raise ExtractorError('this song has been offline because of copyright issues', expected=True)
|
||||
|
||||
song_name = self._html_search_regex(
|
||||
|
|
|
@ -1,13 +1,8 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
HEADRequest,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import float_or_none, int_or_none, parse_duration, unified_strdate
|
||||
|
||||
|
||||
class LA7IE(InfoExtractor):
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
UnsupportedError,
|
||||
determine_ext,
|
||||
|
@ -266,7 +266,7 @@ def _real_extract(self, url):
|
|||
# HEAD request returns redirect response to m3u8 URL if available
|
||||
final_url = self._request_webpage(
|
||||
HEADRequest(streaming_url), display_id, headers=headers,
|
||||
note='Downloading streaming redirect url info').geturl()
|
||||
note='Downloading streaming redirect url info').url
|
||||
|
||||
elif result.get('value_type') == 'stream':
|
||||
claim_id, is_live = result['signing_channel']['claim_id'], True
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue