mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-03-13 18:24:05 +00:00
Merge remote-tracking branch 'origin' into yt-live-from-start-range
This commit is contained in:
commit
2741b5827d
323 changed files with 13049 additions and 4722 deletions
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
8
.github/ISSUE_TEMPLATE/1_broken_site.yml
vendored
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting that yt-dlp is broken on a **supported** site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -64,7 +64,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -72,8 +72,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.10.07, Current version: 2023.10.07
|
||||
yt-dlp is up to date (2023.10.07)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting a new site support request
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -76,7 +76,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -84,8 +84,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.10.07, Current version: 2023.10.07
|
||||
yt-dlp is up to date (2023.10.07)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm requesting a site-specific feature
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -72,7 +72,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -80,8 +80,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.10.07, Current version: 2023.10.07
|
||||
yt-dlp is up to date (2023.10.07)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
8
.github/ISSUE_TEMPLATE/4_bug_report.yml
vendored
|
@ -18,7 +18,7 @@ body:
|
|||
options:
|
||||
- label: I'm reporting a bug unrelated to a specific site
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
|
||||
required: true
|
||||
|
@ -57,7 +57,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -65,8 +65,8 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.10.07, Current version: 2023.10.07
|
||||
yt-dlp is up to date (2023.10.07)
|
||||
<more lines>
|
||||
render: shell
|
||||
validations:
|
||||
|
|
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
8
.github/ISSUE_TEMPLATE/5_feature_request.yml
vendored
|
@ -20,7 +20,7 @@ body:
|
|||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
@ -53,7 +53,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -61,7 +61,7 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.10.07, Current version: 2023.10.07
|
||||
yt-dlp is up to date (2023.10.07)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
|
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
8
.github/ISSUE_TEMPLATE/6_question.yml
vendored
|
@ -26,7 +26,7 @@ body:
|
|||
required: true
|
||||
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
|
||||
required: true
|
||||
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
- label: I've verified that I'm running yt-dlp version **2023.10.07** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
|
||||
required: true
|
||||
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
|
||||
required: true
|
||||
|
@ -59,7 +59,7 @@ body:
|
|||
[debug] Command-line config: ['-vU', 'test:youtube']
|
||||
[debug] Portable config "yt-dlp.conf": ['-i']
|
||||
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
|
||||
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe)
|
||||
[debug] yt-dlp version 2023.10.07 [9d339c4] (win32_exe)
|
||||
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
|
||||
[debug] Checking exe version: ffmpeg -bsfs
|
||||
[debug] Checking exe version: ffprobe -bsfs
|
||||
|
@ -67,7 +67,7 @@ body:
|
|||
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
|
||||
[debug] Proxy map: {}
|
||||
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
|
||||
Latest version: 2023.06.22, Current version: 2023.06.22
|
||||
yt-dlp is up to date (2023.06.22)
|
||||
Latest version: 2023.10.07, Current version: 2023.10.07
|
||||
yt-dlp is up to date (2023.10.07)
|
||||
<more lines>
|
||||
render: shell
|
||||
|
|
65
.github/workflows/codeql.yml
vendored
Normal file
65
.github/workflows/codeql.yml
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
name: "CodeQL"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ 'master', 'gh-pages', 'release' ]
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ 'master' ]
|
||||
schedule:
|
||||
- cron: '59 11 * * 5'
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
language: [ 'python' ]
|
||||
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
|
||||
# Use only 'java' to analyze code written in Java, Kotlin or both
|
||||
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
|
||||
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
# By default, queries listed here will override any specified in a config file.
|
||||
# Prefix the list here with "+" to use these queries and those in the config file.
|
||||
|
||||
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
|
||||
# queries: security-extended,security-and-quality
|
||||
|
||||
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
|
||||
# If the Autobuild fails above, remove it and uncomment the following three lines.
|
||||
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
|
||||
|
||||
# - run: |
|
||||
# echo "Run, Build Application using script"
|
||||
# ./location_of_script_within_repo/buildscript.sh
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
5
.github/workflows/core.yml
vendored
5
.github/workflows/core.yml
vendored
|
@ -13,13 +13,16 @@ jobs:
|
|||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
# CPython 3.11 is in quick-test
|
||||
python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8]
|
||||
python-version: ['3.8', '3.9', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of each CPython/PyPy tests must be in windows
|
||||
- os: windows-latest
|
||||
python-version: '3.7'
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: '3.12'
|
||||
run-tests-ext: bat
|
||||
- os: windows-latest
|
||||
python-version: pypy-3.9
|
||||
run-tests-ext: bat
|
||||
|
|
2
.github/workflows/download.yml
vendored
2
.github/workflows/download.yml
vendored
|
@ -28,7 +28,7 @@ jobs:
|
|||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8]
|
||||
python-version: ['3.7', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10]
|
||||
run-tests-ext: [sh]
|
||||
include:
|
||||
# atleast one of each CPython/PyPy tests must be in windows
|
||||
|
|
|
@ -217,7 +217,7 @@ After you have ensured this site is distributing its content legally, you can fo
|
|||
1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`.
|
||||
1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all`
|
||||
1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
|
||||
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want.
|
||||
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want.
|
||||
1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
|
||||
$ flake8 yt_dlp/extractor/yourextractor.py
|
||||
|
@ -251,7 +251,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
|
|||
|
||||
### Mandatory and optional metafields
|
||||
|
||||
For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
|
||||
For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
|
||||
|
||||
- `id` (media identifier)
|
||||
- `title` (media title)
|
||||
|
@ -696,7 +696,7 @@ formats = [
|
|||
|
||||
### Use convenience conversion and parsing functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
|
@ -704,7 +704,7 @@ Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe
|
|||
|
||||
Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
|
||||
|
||||
Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions.
|
||||
Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions.
|
||||
|
||||
#### Examples
|
||||
|
||||
|
|
51
CONTRIBUTORS
51
CONTRIBUTORS
|
@ -2,7 +2,6 @@ pukkandan (owner)
|
|||
shirt-dev (collaborator)
|
||||
coletdjnz/colethedj (collaborator)
|
||||
Ashish0804 (collaborator)
|
||||
nao20010128nao/Lesmiscore (collaborator)
|
||||
bashonly (collaborator)
|
||||
Grub4K (collaborator)
|
||||
h-h-h-h
|
||||
|
@ -460,3 +459,53 @@ berkanteber
|
|||
OverlordQ
|
||||
rexlambert22
|
||||
Ti4eeT4e
|
||||
AmanSal1
|
||||
bbilly1
|
||||
meliber
|
||||
nnoboa
|
||||
rdamas
|
||||
RfadnjdExt
|
||||
urectanc
|
||||
nao20010128nao/Lesmiscore
|
||||
04-pasha-04
|
||||
aaruni96
|
||||
aky-01
|
||||
AmirAflak
|
||||
ApoorvShah111
|
||||
at-wat
|
||||
davinkevin
|
||||
demon071
|
||||
denhotte
|
||||
FinnRG
|
||||
fireattack
|
||||
Frankgoji
|
||||
GD-Slime
|
||||
hatsomatt
|
||||
ifan-t
|
||||
kshitiz305
|
||||
kylegustavo
|
||||
mabdelfattah
|
||||
nathantouze
|
||||
niemands
|
||||
Rajeshwaran2001
|
||||
RedDeffender
|
||||
Rohxn16
|
||||
sb0stn
|
||||
SevenLives
|
||||
simon300000
|
||||
snixon
|
||||
soundchaser128
|
||||
szabyg
|
||||
trainman261
|
||||
trislee
|
||||
wader
|
||||
Yalab7
|
||||
zhallgato
|
||||
zhong-yiyu
|
||||
Zprokkel
|
||||
AS6939
|
||||
drzraf
|
||||
handlerug
|
||||
jiru
|
||||
madewokherd
|
||||
xofe
|
||||
|
|
283
Changelog.md
283
Changelog.md
|
@ -4,11 +4,290 @@
|
|||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2023.10.07
|
||||
|
||||
#### Extractor changes
|
||||
- **abc.net.au**: iview: [Improve `episode` extraction](https://github.com/yt-dlp/yt-dlp/commit/a9efb4b8d74f3583450ffda0ee57259a47d39c70) ([#8201](https://github.com/yt-dlp/yt-dlp/issues/8201)) by [xofe](https://github.com/xofe)
|
||||
- **erocast**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/47c598783c98c179e04dd12c2a3fee0f3dc53087) ([#8264](https://github.com/yt-dlp/yt-dlp/issues/8264)) by [madewokherd](https://github.com/madewokherd)
|
||||
- **gofile**: [Fix token cookie bug](https://github.com/yt-dlp/yt-dlp/commit/0730d5a966fa8a937d84bfb7f68be5198acb039b) by [bashonly](https://github.com/bashonly)
|
||||
- **iq.com**: [Fix extraction and subtitles](https://github.com/yt-dlp/yt-dlp/commit/35d9cbaf9638ccc9daf8a863063b2e7c135bc664) ([#8260](https://github.com/yt-dlp/yt-dlp/issues/8260)) by [AS6939](https://github.com/AS6939)
|
||||
- **lbry**
|
||||
- [Add playlist support](https://github.com/yt-dlp/yt-dlp/commit/48cceec1ddb8649b5e771df8df79eb9c39c82b90) ([#8213](https://github.com/yt-dlp/yt-dlp/issues/8213)) by [bashonly](https://github.com/bashonly), [drzraf](https://github.com/drzraf), [Grub4K](https://github.com/Grub4K)
|
||||
- [Extract `uploader_id`](https://github.com/yt-dlp/yt-dlp/commit/0e722f2f3ca42e634fd7b06ee70b16bf833ce132) ([#8244](https://github.com/yt-dlp/yt-dlp/issues/8244)) by [drzraf](https://github.com/drzraf)
|
||||
- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/91a670a4f7babe9c8aa2018f57d8c8952a6f49d8) ([#7785](https://github.com/yt-dlp/yt-dlp/issues/7785)) by [jiru](https://github.com/jiru)
|
||||
- **neteasemusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/f980df734cf5c0eaded2f7b38c6c60bccfeebb48) ([#8181](https://github.com/yt-dlp/yt-dlp/issues/8181)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **nhk**: [Fix VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/e831c80e8b2fc025b3b67d82974cc59e3526fdc8) ([#8249](https://github.com/yt-dlp/yt-dlp/issues/8249)) by [garret1317](https://github.com/garret1317)
|
||||
- **radiko**: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2ad3873f0dfa9285c91d2160e36c039e69d597c7) ([#8221](https://github.com/yt-dlp/yt-dlp/issues/8221)) by [garret1317](https://github.com/garret1317)
|
||||
- **substack**
|
||||
- [Fix download cookies bug](https://github.com/yt-dlp/yt-dlp/commit/2f2dda3a7e85148773da3cdbc03ac9949ec1bc45) ([#8219](https://github.com/yt-dlp/yt-dlp/issues/8219)) by [handlerug](https://github.com/handlerug)
|
||||
- [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/fbcc299bd8a19cf8b3c8805d6c268a9110230973) ([#8218](https://github.com/yt-dlp/yt-dlp/issues/8218)) by [handlerug](https://github.com/handlerug)
|
||||
- **theta**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/792f1e64f6a2beac51e85408d142b3118115c4fd) ([#8251](https://github.com/yt-dlp/yt-dlp/issues/8251)) by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
- **wrestleuniversevod**: [Call API with device ID](https://github.com/yt-dlp/yt-dlp/commit/b095fd3fa9d58a65dc9b830bd63b9d909422aa86) ([#8272](https://github.com/yt-dlp/yt-dlp/issues/8272)) by [bashonly](https://github.com/bashonly)
|
||||
- **xhamster**: user: [Support creator urls](https://github.com/yt-dlp/yt-dlp/commit/cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3) ([#8232](https://github.com/yt-dlp/yt-dlp/issues/8232)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **youtube**
|
||||
- [Fix `heatmap` extraction](https://github.com/yt-dlp/yt-dlp/commit/03e85ea99db76a2fddb65bf46f8819bda780aaf3) ([#8299](https://github.com/yt-dlp/yt-dlp/issues/8299)) by [bashonly](https://github.com/bashonly)
|
||||
- [Raise a warning for `Incomplete Data` instead of an error](https://github.com/yt-dlp/yt-dlp/commit/eb5bdbfa70126c7d5355cc0954b63720522e462c) ([#8238](https://github.com/yt-dlp/yt-dlp/issues/8238)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
#### Misc. changes
|
||||
- **cleanup**
|
||||
- [Update extractor tests](https://github.com/yt-dlp/yt-dlp/commit/19c90e405b4137c06dfe6f9aaa02396df0da93e5) ([#7718](https://github.com/yt-dlp/yt-dlp/issues/7718)) by [trainman261](https://github.com/trainman261)
|
||||
- Miscellaneous: [377e85a](https://github.com/yt-dlp/yt-dlp/commit/377e85a1797db9e98b78b38203ed9d4ded229991) by [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
### 2023.09.24
|
||||
|
||||
#### Important changes
|
||||
- **The minimum *recommended* Python version has been raised to 3.8**
|
||||
Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)
|
||||
- Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)
|
||||
- The shell escape function is now using `""` instead of `\"`.
|
||||
- `utils.Popen` has been patched to properly quote commands.
|
||||
|
||||
#### Core changes
|
||||
- [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
|
||||
- [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan)
|
||||
- **compat**
|
||||
- [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd))
|
||||
- [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
|
||||
- [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan)
|
||||
- **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly)
|
||||
- **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly)
|
||||
- **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan)
|
||||
- **utils**
|
||||
- [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly)
|
||||
- HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan)
|
||||
- `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah)
|
||||
- `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move)
|
||||
|
||||
#### Extractor changes
|
||||
- [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
|
||||
- [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move)
|
||||
- **abematv**
|
||||
- [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives)
|
||||
- [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan)
|
||||
- **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg)
|
||||
- **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos)
|
||||
- **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly)
|
||||
- **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick)
|
||||
- **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305)
|
||||
- **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317)
|
||||
- **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick)
|
||||
- **bilibili**
|
||||
- [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt)
|
||||
- [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22)
|
||||
- [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime)
|
||||
- **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0)
|
||||
- **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld)
|
||||
- **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261)
|
||||
- **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261)
|
||||
- **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317)
|
||||
- **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly))
|
||||
- **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo)
|
||||
- **facebook**
|
||||
- [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1)
|
||||
- [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1)
|
||||
- [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack)
|
||||
- reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071)
|
||||
- **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20)
|
||||
- **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04)
|
||||
- **generic**
|
||||
- [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan)
|
||||
- **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly)
|
||||
- **hotstar**
|
||||
- [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001)
|
||||
- [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly)
|
||||
- [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly)
|
||||
- **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7)
|
||||
- **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01)
|
||||
- **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move)
|
||||
- **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly)
|
||||
- **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000)
|
||||
- **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly)
|
||||
- **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn)
|
||||
- **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16)
|
||||
- **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien)
|
||||
- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato)
|
||||
- **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly)
|
||||
- **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317)
|
||||
- **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317)
|
||||
- **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak)
|
||||
- **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly)
|
||||
- **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly)
|
||||
- **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png)
|
||||
- **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly)
|
||||
- **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111)
|
||||
- **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly))
|
||||
- **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move)
|
||||
- **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly)
|
||||
- **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon)
|
||||
- **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG)
|
||||
- **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji)
|
||||
- **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands)
|
||||
- **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu)
|
||||
- **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0)
|
||||
- **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000)
|
||||
- **reddit**
|
||||
- [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly)
|
||||
- **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly)
|
||||
- **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128)
|
||||
- **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee)
|
||||
- **s4c**
|
||||
- [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t)
|
||||
- [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t)
|
||||
- **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt)
|
||||
- **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly)
|
||||
- **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera)
|
||||
- **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader)
|
||||
- **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317)
|
||||
- **tiktok**
|
||||
- [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly)
|
||||
- **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly)
|
||||
- **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli)
|
||||
- **twitcasting**
|
||||
- [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt)
|
||||
- [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat)
|
||||
- **twitter**
|
||||
- [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly)
|
||||
- [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan)
|
||||
- spaces
|
||||
- [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly)
|
||||
- [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly)
|
||||
- **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan)
|
||||
- **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt)
|
||||
- **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel)
|
||||
- **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin)
|
||||
- **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg)
|
||||
- **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly)
|
||||
- **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev)
|
||||
- **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa)
|
||||
- **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly)
|
||||
- **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Downloader changes
|
||||
- **external**
|
||||
- [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Postprocessor changes
|
||||
- **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic)
|
||||
|
||||
#### Networking changes
|
||||
- [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- **Request Handler**
|
||||
- urllib
|
||||
- [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a))
|
||||
|
||||
#### Misc. changes
|
||||
- **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan)
|
||||
- **cleanup**
|
||||
- [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
|
||||
- Miscellaneous
|
||||
- [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan)
|
||||
- [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT)
|
||||
- [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K)
|
||||
- **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K)
|
||||
- **test**
|
||||
- [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan)
|
||||
- download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat)
|
||||
|
||||
### 2023.07.06
|
||||
|
||||
#### Important changes
|
||||
- Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)
|
||||
- `--add-header Cookie:` is deprecated and auto-scoped to input URL domains
|
||||
- Cookies are scoped when passed to external downloaders
|
||||
- Add `cookies` field to info.json and deprecate `http_headers.Cookie`
|
||||
|
||||
#### Core changes
|
||||
- [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56](https://github.com/yt-dlp/yt-dlp/commit/e59e20744eb32ce4b6ea0dece7c673be8376a710) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Change how `Cookie` headers are handled](https://github.com/yt-dlp/yt-dlp/commit/3121512228487c9c690d3d39bfd2579addf96e07) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Prevent `Cookie` leaks on HTTP redirect](https://github.com/yt-dlp/yt-dlp/commit/f8b4bcc0a791274223723488bfbfc23ea3276641) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- **formats**: [Fix best fallback for storyboards](https://github.com/yt-dlp/yt-dlp/commit/906c0bdcd8974340d619e99ccd613c163eb0d0c2) by [pukkandan](https://github.com/pukkandan)
|
||||
- **outtmpl**: [Pad `playlist_index` etc even when with internal formatting](https://github.com/yt-dlp/yt-dlp/commit/47bcd437247152e0af5b3ebc5592db7bb66855c2) by [pukkandan](https://github.com/pukkandan)
|
||||
- **utils**: clean_podcast_url: [Handle protocol in redirect URL](https://github.com/yt-dlp/yt-dlp/commit/91302ed349f34dc26cc1d661bb45a4b71f4417f7) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Extractor changes
|
||||
- **abc**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8f05fbae2a79ce0713077ccc68b354e63216bf20) ([#7434](https://github.com/yt-dlp/yt-dlp/issues/7434)) by [meliber](https://github.com/meliber)
|
||||
- **AdultSwim**: [Extract subtitles from m3u8](https://github.com/yt-dlp/yt-dlp/commit/5e16cf92eb496b7c1541a6b1d727cb87542984db) ([#7421](https://github.com/yt-dlp/yt-dlp/issues/7421)) by [nnoboa](https://github.com/nnoboa)
|
||||
- **crunchyroll**: music: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5b4b92769afcc398475e481bfa839f1158902fe9) ([#7439](https://github.com/yt-dlp/yt-dlp/issues/7439)) by [AmanSal1](https://github.com/AmanSal1), [rdamas](https://github.com/rdamas)
|
||||
- **Douyin**: [Fix extraction from webpage](https://github.com/yt-dlp/yt-dlp/commit/a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2) by [bashonly](https://github.com/bashonly)
|
||||
- **googledrive**: [Fix source format extraction](https://github.com/yt-dlp/yt-dlp/commit/3b7f5300c577fef40464d46d4e4037a69d51fe82) ([#7395](https://github.com/yt-dlp/yt-dlp/issues/7395)) by [RfadnjdExt](https://github.com/RfadnjdExt)
|
||||
- **kick**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/ef8509c300ea50da86aea447eb214d3d6f6db6bb) by [bashonly](https://github.com/bashonly)
|
||||
- **qdance**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f0a1ff118145b6449982ba401f9a9f656ecd8062) ([#7420](https://github.com/yt-dlp/yt-dlp/issues/7420)) by [bashonly](https://github.com/bashonly)
|
||||
- **sbs**: [Python 3.7 compat](https://github.com/yt-dlp/yt-dlp/commit/f393bbe724b1fc6c7f754a5da507e807b2b40ad2) by [pukkandan](https://github.com/pukkandan)
|
||||
- **stacommu**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/af1fd12f675220df6793fc019dff320bc76e8080) ([#7432](https://github.com/yt-dlp/yt-dlp/issues/7432)) by [urectanc](https://github.com/urectanc)
|
||||
- **twitter**
|
||||
- [Fix unauthenticated extraction](https://github.com/yt-dlp/yt-dlp/commit/49296437a8e5fa91dacb5446e51ab588474c85d3) ([#7476](https://github.com/yt-dlp/yt-dlp/issues/7476)) by [bashonly](https://github.com/bashonly)
|
||||
- spaces: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1cffd621cb371f1563563cfb2fe37d137e8a7bee) ([#7512](https://github.com/yt-dlp/yt-dlp/issues/7512)) by [bashonly](https://github.com/bashonly)
|
||||
- **vidlii**: [Handle relative URLs](https://github.com/yt-dlp/yt-dlp/commit/ad8902f616ad2541f9b9626738f1393fad89a64c) by [pukkandan](https://github.com/pukkandan)
|
||||
- **vk**: VKPlay, VKPlayLive: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/8776349ef6b1f644584a92dfa00a05208a48edc4) ([#7358](https://github.com/yt-dlp/yt-dlp/issues/7358)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **youtube**
|
||||
- [Add extractor-arg `formats`](https://github.com/yt-dlp/yt-dlp/commit/58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Avoid false DRM detection](https://github.com/yt-dlp/yt-dlp/commit/94ed638a437fc766699d440e978982e24ce6a30a) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Fix comments' `is_favorited`](https://github.com/yt-dlp/yt-dlp/commit/89bed013741a776506f60380b7fd89d27d0710b4) ([#7390](https://github.com/yt-dlp/yt-dlp/issues/7390)) by [bbilly1](https://github.com/bbilly1)
|
||||
- [Ignore incomplete data for comment threads by default](https://github.com/yt-dlp/yt-dlp/commit/4dc4d8473c085900edc841c87c20041233d25b1f) ([#7475](https://github.com/yt-dlp/yt-dlp/issues/7475)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Process `post_live` over 2 hours](https://github.com/yt-dlp/yt-dlp/commit/d949c10c45bfc359bdacd52e6a180169b8128958) by [pukkandan](https://github.com/pukkandan)
|
||||
- stories: [Remove](https://github.com/yt-dlp/yt-dlp/commit/90db9a3c00ca80492c6a58c542e4cbf4c2710866) ([#7459](https://github.com/yt-dlp/yt-dlp/issues/7459)) by [pukkandan](https://github.com/pukkandan)
|
||||
- tab: [Support shorts-only playlists](https://github.com/yt-dlp/yt-dlp/commit/fcbc9ed760be6e3455bbadfaf277b4504b06f068) ([#7425](https://github.com/yt-dlp/yt-dlp/issues/7425)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
#### Downloader changes
|
||||
- **aria2c**: [Add `--no-conf`](https://github.com/yt-dlp/yt-dlp/commit/8a8af356e3bba98a7f7d333aff0777d5d92130c8) by [pukkandan](https://github.com/pukkandan)
|
||||
- **external**: [Scope cookies](https://github.com/yt-dlp/yt-dlp/commit/1ceb657bdd254ad961489e5060f2ccc7d556b729) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
|
||||
- **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Misc. changes
|
||||
- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf)
|
||||
- **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan)
|
||||
- **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan)
|
||||
- **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
### 2023.06.22
|
||||
|
||||
#### Core changes
|
||||
- [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan)
|
||||
- [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan)
|
||||
- Support negative time-ranges
|
||||
- Add `*from-url` to obey time-ranges in URL
|
||||
- [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Extractor changes
|
||||
|
@ -19,7 +298,7 @@
|
|||
- **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb)
|
||||
- **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk)
|
||||
- **youtube**
|
||||
- [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142)
|
||||
- [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) by [pukkandan](https://github.com/pukkandan)
|
||||
- IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively
|
||||
- IOS also has higher bit-rate 'premium' formats though they are not labeled as such
|
||||
- [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan)
|
||||
|
@ -27,7 +306,7 @@
|
|||
- [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan)
|
||||
|
||||
#### Misc. changes
|
||||
- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700)
|
||||
- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) by [pukkandan](https://github.com/pukkandan)
|
||||
- **cleanup**
|
||||
- Miscellaneous
|
||||
- [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly)
|
||||
|
|
|
@ -44,16 +44,6 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
|
|||
* Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc
|
||||
|
||||
|
||||
## [Lesmiscore](https://github.com/Lesmiscore)
|
||||
|
||||
**Bitcoin**: bc1qfd02r007cutfdjwjmyy9w23rjvtls6ncve7r3s
|
||||
**Monacoin**: mona1q3tf7dzvshrhfe3md379xtvt2n22duhglv5dskr
|
||||
|
||||
* Download live from start to end for YouTube
|
||||
* Added support for new websites AbemaTV, mildom, PixivSketch, skeb, radiko, voicy, mirrativ, openrec, whowatch, damtomo, 17.live, mixch etc
|
||||
* Improved/fixed support for fc2, YahooJapanNews, tver, iwara etc
|
||||
|
||||
|
||||
## [bashonly](https://github.com/bashonly)
|
||||
|
||||
* `--update-to`, automated release, nightly builds
|
||||
|
|
2
Makefile
2
Makefile
|
@ -74,7 +74,7 @@ offlinetest: codetest
|
|||
$(PYTHON) -m pytest -k "not download"
|
||||
|
||||
# XXX: This is hard to maintain
|
||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies
|
||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking
|
||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||
mkdir -p zip
|
||||
for d in $(CODE_FOLDERS) ; do \
|
||||
|
|
33
README.md
33
README.md
|
@ -12,7 +12,7 @@
|
|||
[](LICENSE "License")
|
||||
[](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
[](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
|
||||
[](https://github.com/yt-dlp/yt-dlp/pulse/monthly "Last activity")
|
||||
|
||||
</div>
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
|
|||
|
||||
# NEW FEATURES
|
||||
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/yt-dlp/yt-dlp/commit/42f2d4) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
|
@ -1323,7 +1323,7 @@ The available fields are:
|
|||
- `extractor` (string): Name of the extractor
|
||||
- `extractor_key` (string): Key name of the extractor
|
||||
- `epoch` (numeric): Unix epoch of when the information extraction was completed
|
||||
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`
|
||||
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`, padded with leading zeros to 5 digits
|
||||
- `video_autonumber` (numeric): Number that will be increased with each video
|
||||
- `n_entries` (numeric): Total number of extracted items in the playlist
|
||||
- `playlist_id` (string): Identifier of the playlist that contains the video
|
||||
|
@ -1509,7 +1509,7 @@ Unless `--video-multistreams` is used, all formats with a video stream except th
|
|||
|
||||
## Filtering Formats
|
||||
|
||||
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
|
||||
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"` since filters without a selector are interpreted as `best`).
|
||||
|
||||
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
|
||||
|
||||
|
@ -1545,7 +1545,7 @@ Any string comparison may be prefixed with negation `!` in order to produce an o
|
|||
|
||||
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
|
||||
Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
|
||||
|
||||
|
@ -1569,7 +1569,7 @@ The available fields are:
|
|||
- `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
|
||||
- `ext`: Equivalent to `vext,aext`
|
||||
- `filesize`: Exact filesize, if known in advance
|
||||
- `fs_approx`: Approximate filesize calculated from the manifests
|
||||
- `fs_approx`: Approximate filesize
|
||||
- `size`: Exact filesize if available, otherwise approximate filesize
|
||||
- `height`: Height of video
|
||||
- `width`: Width of video
|
||||
|
@ -1580,7 +1580,7 @@ The available fields are:
|
|||
- `tbr`: Total average bitrate in KBit/s
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `br`: Equivalent to using `tbr,vbr,abr`
|
||||
- `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
|
||||
- `asr`: Audio sample rate in Hz
|
||||
|
||||
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
|
||||
|
@ -1800,15 +1800,16 @@ The following extractors use this feature:
|
|||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `include_duplicate_formats`: Extract formats with identical content but different URLs or protocol. This is useful if some of the formats are unavailable or throttled.
|
||||
* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8)
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
|
||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||
|
@ -1845,6 +1846,9 @@ The following extractors use this feature:
|
|||
* `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265`
|
||||
* `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv`
|
||||
|
||||
#### niconicochannelplus
|
||||
* `max_comments`: Maximum number of comments to extract - default is `120`
|
||||
|
||||
#### tiktok
|
||||
* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
|
||||
|
@ -1854,9 +1858,9 @@ The following extractors use this feature:
|
|||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
||||
#### twitter
|
||||
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
||||
* `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in
|
||||
|
||||
#### wrestleuniverse
|
||||
#### stacommu, wrestleuniverse
|
||||
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
|
||||
|
||||
#### twitch
|
||||
|
@ -1865,6 +1869,9 @@ The following extractors use this feature:
|
|||
#### nhkradirulive (NHK らじる★らじる LIVE)
|
||||
* `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo`
|
||||
|
||||
#### nflplusreplay
|
||||
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
|
@ -1954,7 +1961,7 @@ with YoutubeDL() as ydl:
|
|||
ydl.download(URLS)
|
||||
```
|
||||
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L184).
|
||||
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L183) or `help(yt_dlp.YoutubeDL)` in a Python shell. If you are already familiar with the CLI, you can use [`devscripts/cli_to_api.py`](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) to translate any CLI switches to `YoutubeDL` params.
|
||||
|
||||
**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information)
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
[
|
||||
{
|
||||
"action": "add",
|
||||
"when": "776d1c3f0c9b00399896dd2e40e78e9a43218109",
|
||||
"when": "29cb20bd563c02671b31dd840139e93dd37150a1",
|
||||
"short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`"
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "776d1c3f0c9b00399896dd2e40e78e9a43218109",
|
||||
"when": "5038f6d713303e0967d002216e7a88652401c22a",
|
||||
"short": "[priority] **YouTube throttling fixes!**"
|
||||
},
|
||||
{
|
||||
|
@ -38,13 +38,15 @@
|
|||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "7b37e8b23691613f331bd4ebc9d639dd6f93c972",
|
||||
"short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL"
|
||||
"when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56",
|
||||
"short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "1e75d97db21152acc764b30a688e516f04b8a142",
|
||||
"short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such"
|
||||
"short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
|
@ -55,6 +57,46 @@
|
|||
{
|
||||
"action": "change",
|
||||
"when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700",
|
||||
"short": "[misc] Revert \"Add automatic duplicate issue detection\""
|
||||
"short": "[misc] Revert \"Add automatic duplicate issue detection\"",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "1ceb657bdd254ad961489e5060f2ccc7d556b729",
|
||||
"short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
|
||||
"short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b",
|
||||
"authors": ["pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f",
|
||||
"short": "[test] Add tests for socks proxies (#7908)",
|
||||
"authors": ["coletdjnz"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "4bf912282a34b58b6b35d8f7e6be535770c89c76",
|
||||
"short": "[rh:urllib] Remove dot segments during URL normalization (#7662)",
|
||||
"authors": ["coletdjnz"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "59e92b1f1833440bb2190f847eb735cf0f90bc85",
|
||||
"short": "[rh:urllib] Simplify gzip decoding (#7611)",
|
||||
"authors": ["Grub4K"]
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b",
|
||||
"short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)"
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "61bdf15fc7400601c3da1aa7a43917310a5bf391",
|
||||
"short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands."
|
||||
}
|
||||
]
|
||||
|
|
|
@ -31,33 +31,27 @@ class CommitGroup(enum.Enum):
|
|||
EXTRACTOR = 'Extractor'
|
||||
DOWNLOADER = 'Downloader'
|
||||
POSTPROCESSOR = 'Postprocessor'
|
||||
NETWORKING = 'Networking'
|
||||
MISC = 'Misc.'
|
||||
|
||||
@classmethod
|
||||
@property
|
||||
def ignorable_prefixes(cls):
|
||||
return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream')
|
||||
|
||||
@classmethod
|
||||
@lru_cache
|
||||
def commit_lookup(cls):
|
||||
def subgroup_lookup(cls):
|
||||
return {
|
||||
name: group
|
||||
for group, names in {
|
||||
cls.PRIORITY: {'priority'},
|
||||
cls.CORE: {
|
||||
'aes',
|
||||
'cache',
|
||||
'compat_utils',
|
||||
'compat',
|
||||
'cookies',
|
||||
'core',
|
||||
'dependencies',
|
||||
'formats',
|
||||
'jsinterp',
|
||||
'outtmpl',
|
||||
'plugins',
|
||||
'update',
|
||||
'upstream',
|
||||
'utils',
|
||||
},
|
||||
cls.MISC: {
|
||||
|
@ -65,23 +59,40 @@ class CommitGroup(enum.Enum):
|
|||
'cleanup',
|
||||
'devscripts',
|
||||
'docs',
|
||||
'misc',
|
||||
'test',
|
||||
},
|
||||
cls.EXTRACTOR: {'extractor'},
|
||||
cls.DOWNLOADER: {'downloader'},
|
||||
cls.POSTPROCESSOR: {'postprocessor'},
|
||||
cls.NETWORKING: {
|
||||
'rh',
|
||||
},
|
||||
}.items()
|
||||
for name in names
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get(cls, value):
|
||||
result = cls.commit_lookup().get(value)
|
||||
if result:
|
||||
logger.debug(f'Mapped {value!r} => {result.name}')
|
||||
@lru_cache
|
||||
def group_lookup(cls):
|
||||
result = {
|
||||
'fd': cls.DOWNLOADER,
|
||||
'ie': cls.EXTRACTOR,
|
||||
'pp': cls.POSTPROCESSOR,
|
||||
'upstream': cls.CORE,
|
||||
}
|
||||
result.update({item.name.lower(): item for item in iter(cls)})
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
|
||||
group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
|
||||
|
||||
result = cls.group_lookup().get(group)
|
||||
if not result:
|
||||
if subgroup:
|
||||
return None, value
|
||||
subgroup = group
|
||||
result = cls.subgroup_lookup().get(subgroup)
|
||||
|
||||
return result, subgroup or None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Commit:
|
||||
|
@ -196,19 +207,23 @@ class Changelog:
|
|||
for commit_infos in cleanup_misc_items.values():
|
||||
sorted_items.append(CommitInfo(
|
||||
'cleanup', ('Miscellaneous',), ', '.join(
|
||||
self._format_message_link(None, info.commit.hash).strip()
|
||||
self._format_message_link(None, info.commit.hash)
|
||||
for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
|
||||
[], Commit(None, '', commit_infos[0].commit.authors), []))
|
||||
|
||||
return sorted_items
|
||||
|
||||
def format_single_change(self, info):
|
||||
message = self._format_message_link(info.message, info.commit.hash)
|
||||
def format_single_change(self, info: CommitInfo):
|
||||
message, sep, rest = info.message.partition('\n')
|
||||
if '[' not in message:
|
||||
# If the message doesn't already contain markdown links, try to add a link to the commit
|
||||
message = self._format_message_link(message, info.commit.hash)
|
||||
|
||||
if info.issues:
|
||||
message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1)
|
||||
message = f'{message} ({self._format_issues(info.issues)})'
|
||||
|
||||
if info.commit.authors:
|
||||
message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1)
|
||||
message = f'{message} by {self._format_authors(info.commit.authors)}'
|
||||
|
||||
if info.fixes:
|
||||
fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
|
||||
|
@ -217,16 +232,14 @@ class Changelog:
|
|||
if authors != info.commit.authors:
|
||||
fix_message = f'{fix_message} by {self._format_authors(authors)}'
|
||||
|
||||
message = message.replace('\n', f' (With fixes in {fix_message})\n', 1)
|
||||
message = f'{message} (With fixes in {fix_message})'
|
||||
|
||||
return message[:-1]
|
||||
return message if not sep else f'{message}{sep}{rest}'
|
||||
|
||||
def _format_message_link(self, message, hash):
|
||||
assert message or hash, 'Improperly defined commit message or override'
|
||||
message = message if message else hash[:HASH_LENGTH]
|
||||
if not hash:
|
||||
return f'{message}\n'
|
||||
return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1)
|
||||
return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
|
||||
|
||||
def _format_issues(self, issues):
|
||||
return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
|
||||
|
@ -247,11 +260,12 @@ class CommitRange:
|
|||
AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
|
||||
MESSAGE_RE = re.compile(r'''
|
||||
(?:\[(?P<prefix>[^\]]+)\]\ )?
|
||||
(?:(?P<sub_details>`?[^:`]+`?): )?
|
||||
(?:(?P<sub_details>`?[\w.-]+`?): )?
|
||||
(?P<message>.+?)
|
||||
(?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
|
||||
''', re.VERBOSE | re.DOTALL)
|
||||
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
|
||||
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
|
||||
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
|
||||
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
|
||||
|
||||
|
@ -279,7 +293,7 @@ class CommitRange:
|
|||
self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
|
||||
f'{self._start}..{self._end}' if self._start else self._end).stdout
|
||||
|
||||
commits = {}
|
||||
commits, reverts = {}, {}
|
||||
fixes = defaultdict(list)
|
||||
lines = iter(result.splitlines(False))
|
||||
for i, commit_hash in enumerate(lines):
|
||||
|
@ -300,6 +314,11 @@ class CommitRange:
|
|||
logger.debug(f'Reached Release commit, breaking: {commit}')
|
||||
break
|
||||
|
||||
revert_match = self.REVERT_RE.fullmatch(commit.short)
|
||||
if revert_match:
|
||||
reverts[revert_match.group(1)] = commit
|
||||
continue
|
||||
|
||||
fix_match = self.FIXES_RE.search(commit.short)
|
||||
if fix_match:
|
||||
commitish = fix_match.group(1)
|
||||
|
@ -307,6 +326,13 @@ class CommitRange:
|
|||
|
||||
commits[commit.hash] = commit
|
||||
|
||||
for commitish, revert_commit in reverts.items():
|
||||
reverted = commits.pop(commitish, None)
|
||||
if reverted:
|
||||
logger.debug(f'{commitish} fully reverted {reverted}')
|
||||
else:
|
||||
commits[revert_commit.hash] = revert_commit
|
||||
|
||||
for commitish, fix_commits in fixes.items():
|
||||
if commitish in commits:
|
||||
hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
|
||||
|
@ -322,7 +348,7 @@ class CommitRange:
|
|||
for override in overrides:
|
||||
when = override.get('when')
|
||||
if when and when not in self and when != self._start:
|
||||
logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
|
||||
logger.debug(f'Ignored {when!r} override')
|
||||
continue
|
||||
|
||||
override_hash = override.get('hash') or when
|
||||
|
@ -350,7 +376,7 @@ class CommitRange:
|
|||
for commit in self:
|
||||
upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
|
||||
if upstream_re:
|
||||
commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
||||
commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
|
||||
|
||||
match = self.MESSAGE_RE.fullmatch(commit.short)
|
||||
if not match:
|
||||
|
@ -395,25 +421,20 @@ class CommitRange:
|
|||
if not prefix:
|
||||
return CommitGroup.CORE, None, ()
|
||||
|
||||
prefix, _, details = prefix.partition('/')
|
||||
prefix = prefix.strip()
|
||||
details = details.strip()
|
||||
prefix, *sub_details = prefix.split(':')
|
||||
|
||||
group = CommitGroup.get(prefix.lower())
|
||||
if group is CommitGroup.PRIORITY:
|
||||
prefix, _, details = details.partition('/')
|
||||
group, details = CommitGroup.get(prefix)
|
||||
if group is CommitGroup.PRIORITY and details:
|
||||
details = details.partition('/')[2].strip()
|
||||
|
||||
if not details and prefix and prefix not in CommitGroup.ignorable_prefixes:
|
||||
logger.debug(f'Replaced details with {prefix!r}')
|
||||
details = prefix or None
|
||||
if details and '/' in details:
|
||||
logger.error(f'Prefix is overnested, using first part: {prefix}')
|
||||
details = details.partition('/')[0].strip()
|
||||
|
||||
if details == 'common':
|
||||
details = None
|
||||
|
||||
if details:
|
||||
details, *sub_details = details.split(':')
|
||||
else:
|
||||
sub_details = []
|
||||
elif group is CommitGroup.NETWORKING and details == 'rh':
|
||||
details = 'Request Handler'
|
||||
|
||||
return group, details, sub_details
|
||||
|
||||
|
|
|
@ -10,14 +10,14 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||
import argparse
|
||||
import contextlib
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from devscripts.utils import read_version, run_process, write_file
|
||||
|
||||
|
||||
def get_new_version(version, revision):
|
||||
if not version:
|
||||
version = datetime.utcnow().strftime('%Y.%m.%d')
|
||||
version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
|
||||
|
||||
if revision:
|
||||
assert revision.isdigit(), 'Revision must be a number'
|
||||
|
|
|
@ -8,7 +8,6 @@ ignore = E402,E501,E731,E741,W503
|
|||
max_line_length = 120
|
||||
per_file_ignores =
|
||||
devscripts/lazy_load_template.py: F401
|
||||
yt_dlp/utils/__init__.py: F401, F403
|
||||
|
||||
|
||||
[autoflake]
|
||||
|
|
3
setup.py
3
setup.py
|
@ -65,7 +65,8 @@ def py2exe_params():
|
|||
'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy'],
|
||||
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
|
||||
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
|
||||
},
|
||||
'zipfile': None,
|
||||
}
|
||||
|
|
|
@ -77,7 +77,7 @@
|
|||
- **AnimalPlanet**
|
||||
- **ant1newsgr:article**: ant1news.gr articles
|
||||
- **ant1newsgr:embed**: ant1news.gr embedded videos
|
||||
- **ant1newsgr:watch**: ant1news.gr videos
|
||||
- **antenna:watch**: antenna.gr and ant1news.gr videos
|
||||
- **Anvato**
|
||||
- **aol.com**: Yahoo screen and movies
|
||||
- **APA**
|
||||
|
@ -98,8 +98,6 @@
|
|||
- **ArteTVCategory**
|
||||
- **ArteTVEmbed**
|
||||
- **ArteTVPlaylist**
|
||||
- **AsianCrush**
|
||||
- **AsianCrushPlaylist**
|
||||
- **AtresPlayer**: [*atresplayer*](## "netrc machine")
|
||||
- **AtScaleConfEvent**
|
||||
- **ATTTechChannel**
|
||||
|
@ -118,6 +116,7 @@
|
|||
- **awaan:live**
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **axs.tv**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **BanBye**
|
||||
|
@ -162,11 +161,16 @@
|
|||
- **BilibiliAudioAlbum**
|
||||
- **BiliBiliBangumi**
|
||||
- **BiliBiliBangumiMedia**
|
||||
- **BiliBiliBangumiSeason**
|
||||
- **BilibiliCollectionList**
|
||||
- **BilibiliFavoritesList**
|
||||
- **BiliBiliPlayer**
|
||||
- **BilibiliPlaylist**
|
||||
- **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix
|
||||
- **BilibiliSeriesList**
|
||||
- **BilibiliSpaceAudio**
|
||||
- **BilibiliSpacePlaylist**
|
||||
- **BilibiliSpaceVideo**
|
||||
- **BilibiliWatchlater**
|
||||
- **BiliIntl**: [*biliintl*](## "netrc machine")
|
||||
- **biliIntl:series**: [*biliintl*](## "netrc machine")
|
||||
- **BiliLive**
|
||||
|
@ -201,6 +205,8 @@
|
|||
- **BreitBart**
|
||||
- **brightcove:legacy**
|
||||
- **brightcove:new**
|
||||
- **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org
|
||||
- **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org
|
||||
- **BRMediathek**: Bayerischer Rundfunk Mediathek
|
||||
- **bt:article**: Bergens Tidende Articles
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
|
@ -220,14 +226,17 @@
|
|||
- **Camsoda**
|
||||
- **CamtasiaEmbed**
|
||||
- **CamWithHer**
|
||||
- **Canal1**
|
||||
- **CanalAlpha**
|
||||
- **canalc2.tv**
|
||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||
- **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
|
||||
- **CarambaTV**
|
||||
- **CarambaTVPage**
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:player:playlist**
|
||||
- **CBS**
|
||||
- **CBSInteractive**
|
||||
- **CBSLocal**
|
||||
|
@ -257,6 +266,8 @@
|
|||
- **Cinchcast**
|
||||
- **Cinemax**
|
||||
- **CinetecaMilano**
|
||||
- **Cineverse**
|
||||
- **CineverseDetails**
|
||||
- **CiscoLiveSearch**
|
||||
- **CiscoLiveSession**
|
||||
- **ciscowebex**: Cisco Webex
|
||||
|
@ -365,7 +376,7 @@
|
|||
- **Dotsub**
|
||||
- **Douyin**
|
||||
- **DouyuShow**
|
||||
- **DouyuTV**: 斗鱼
|
||||
- **DouyuTV**: 斗鱼直播
|
||||
- **DPlay**
|
||||
- **DRBonanza**
|
||||
- **Drooble**
|
||||
|
@ -408,8 +419,10 @@
|
|||
- **Engadget**
|
||||
- **Epicon**
|
||||
- **EpiconSeries**
|
||||
- **eplus:inbound**: e+ (イープラス) overseas
|
||||
- **Epoch**
|
||||
- **Eporner**
|
||||
- **Erocast**
|
||||
- **EroProfile**: [*eroprofile*](## "netrc machine")
|
||||
- **EroProfile:album**
|
||||
- **ertflix**: ERTFLIX videos
|
||||
|
@ -687,6 +700,7 @@
|
|||
- **LastFMUser**
|
||||
- **lbry**
|
||||
- **lbry:channel**
|
||||
- **lbry:playlist**
|
||||
- **LCI**
|
||||
- **Lcp**
|
||||
- **LcpPlay**
|
||||
|
@ -732,6 +746,7 @@
|
|||
- **lynda**: [*lynda*](## "netrc machine") lynda.com videos
|
||||
- **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses
|
||||
- **m6**
|
||||
- **MagellanTV**
|
||||
- **MagentaMusik360**
|
||||
- **mailru**: Видео@Mail.Ru
|
||||
- **mailru:music**: Музыка@Mail.Ru
|
||||
|
@ -812,6 +827,7 @@
|
|||
- **Mofosex**
|
||||
- **MofosexEmbed**
|
||||
- **Mojvideo**
|
||||
- **Monstercat**
|
||||
- **MonsterSirenHypergryphMusic**
|
||||
- **Morningstar**: morningstar.com
|
||||
- **Motherless**
|
||||
|
@ -840,6 +856,7 @@
|
|||
- **MujRozhlas**
|
||||
- **Murrtube**
|
||||
- **MurrtubeUser**: Murrtube user profile
|
||||
- **MuseAI**
|
||||
- **MuseScore**
|
||||
- **MusicdexAlbum**
|
||||
- **MusicdexArtist**
|
||||
|
@ -944,6 +961,9 @@
|
|||
- **niconico:playlist**
|
||||
- **niconico:series**
|
||||
- **niconico:tag**: NicoNico video tag URLs
|
||||
- **NiconicoChannelPlus**: ニコニコチャンネルプラス
|
||||
- **NiconicoChannelPlus:channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives
|
||||
- **NiconicoChannelPlus:channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos
|
||||
- **NiconicoUser**
|
||||
- **nicovideo:search**: Nico video search; "nicosearch:" prefix
|
||||
- **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix
|
||||
|
@ -1046,6 +1066,7 @@
|
|||
- **Patreon**
|
||||
- **PatreonCampaign**
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
- **PBSKids**
|
||||
- **PearVideo**
|
||||
- **PeekVids**
|
||||
- **peer.tv**
|
||||
|
@ -1062,6 +1083,7 @@
|
|||
- **phoenix.de**
|
||||
- **Photobucket**
|
||||
- **Piapro**: [*piapro*](## "netrc machine")
|
||||
- **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM
|
||||
- **Picarto**
|
||||
- **PicartoVod**
|
||||
- **Piksel**
|
||||
|
@ -1105,6 +1127,7 @@
|
|||
- **polskieradio:podcast:list**
|
||||
- **Popcorntimes**
|
||||
- **PopcornTV**
|
||||
- **Pornbox**
|
||||
- **PornCom**
|
||||
- **PornerBros**
|
||||
- **Pornez**
|
||||
|
@ -1121,7 +1144,6 @@
|
|||
- **PornTop**
|
||||
- **PornTube**
|
||||
- **Pr0gramm**
|
||||
- **Pr0grammStatic**
|
||||
- **PrankCast**
|
||||
- **PremiershipRugby**
|
||||
- **PressTV**
|
||||
|
@ -1136,6 +1158,7 @@
|
|||
- **puhutv:serie**
|
||||
- **Puls4**
|
||||
- **Pyvideo**
|
||||
- **QDance**: [*qdance*](## "netrc machine")
|
||||
- **QingTing**
|
||||
- **qqmusic**: QQ音乐
|
||||
- **qqmusic:album**: QQ音乐 - 专辑
|
||||
|
@ -1155,6 +1178,10 @@
|
|||
- **radiocanada**
|
||||
- **radiocanada:audiovideo**
|
||||
- **radiofrance**
|
||||
- **RadioFranceLive**
|
||||
- **RadioFrancePodcast**
|
||||
- **RadioFranceProfile**
|
||||
- **RadioFranceProgramSchedule**
|
||||
- **RadioJavan**
|
||||
- **radiokapital**
|
||||
- **radiokapital:show**
|
||||
|
@ -1176,6 +1203,7 @@
|
|||
- **RayWenderlichCourse**
|
||||
- **RbgTum**
|
||||
- **RbgTumCourse**
|
||||
- **RbgTumNewCourse**
|
||||
- **RBMARadio**
|
||||
- **RCS**
|
||||
- **RCSEmbeds**
|
||||
|
@ -1258,6 +1286,8 @@
|
|||
- **Ruutu**
|
||||
- **Ruv**
|
||||
- **ruv.is:spila**
|
||||
- **S4C**
|
||||
- **S4CSeries**
|
||||
- **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video
|
||||
- **safari:api**: [*safari*](## "netrc machine")
|
||||
- **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses
|
||||
|
@ -1324,6 +1354,7 @@
|
|||
- **Smotrim**
|
||||
- **Snotr**
|
||||
- **Sohu**
|
||||
- **SohuV**
|
||||
- **SonyLIV**: [*sonyliv*](## "netrc machine")
|
||||
- **SonyLIVSeries**
|
||||
- **soundcloud**: [*soundcloud*](## "netrc machine")
|
||||
|
@ -1363,6 +1394,8 @@
|
|||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **StacommuLive**: [*stacommu*](## "netrc machine")
|
||||
- **StacommuVOD**: [*stacommu*](## "netrc machine")
|
||||
- **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **StarTrek**
|
||||
|
@ -1375,7 +1408,6 @@
|
|||
- **StoryFireSeries**
|
||||
- **StoryFireUser**
|
||||
- **Streamable**
|
||||
- **Streamanity**
|
||||
- **streamcloud.eu**
|
||||
- **StreamCZ**
|
||||
- **StreamFF**
|
||||
|
@ -1400,6 +1432,9 @@
|
|||
- **Tagesschau**
|
||||
- **Tass**
|
||||
- **TBS**
|
||||
- **TBSJPEpisode**
|
||||
- **TBSJPPlaylist**
|
||||
- **TBSJPProgram**
|
||||
- **TDSLifeway**
|
||||
- **Teachable**: [*teachable*](## "netrc machine")
|
||||
- **TeachableCourse**: [*teachable*](## "netrc machine")
|
||||
|
@ -1441,8 +1476,6 @@
|
|||
- **ThePlatformFeed**
|
||||
- **TheStar**
|
||||
- **TheSun**
|
||||
- **ThetaStream**
|
||||
- **ThetaVideo**
|
||||
- **TheWeatherChannel**
|
||||
- **ThisAmericanLife**
|
||||
- **ThisAV**
|
||||
|
@ -1647,6 +1680,8 @@
|
|||
- **vk**: [*vk*](## "netrc machine") VK
|
||||
- **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos
|
||||
- **vk:wallpost**: [*vk*](## "netrc machine")
|
||||
- **VKPlay**
|
||||
- **VKPlayLive**
|
||||
- **vm.tiktok**
|
||||
- **Vocaroo**
|
||||
- **Vodlocker**
|
||||
|
@ -1697,7 +1732,6 @@
|
|||
- **wdr:mobile**: (**Currently broken**)
|
||||
- **WDRElefant**
|
||||
- **WDRPage**
|
||||
- **web.archive:vlive**: web.archive.org saved vlive videos
|
||||
- **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
|
||||
- **Webcamerapl**
|
||||
- **Webcaster**
|
||||
|
@ -1705,7 +1739,8 @@
|
|||
- **WebOfStories**
|
||||
- **WebOfStoriesPlaylist**
|
||||
- **Weibo**
|
||||
- **WeiboMobile**
|
||||
- **WeiboUser**
|
||||
- **WeiboVideo**
|
||||
- **WeiqiTV**: WQTV
|
||||
- **wetv:episode**
|
||||
- **WeTvSeries**
|
||||
|
@ -1721,6 +1756,7 @@
|
|||
- **Whyp**
|
||||
- **wikimedia.org**
|
||||
- **Willow**
|
||||
- **Wimbledon**
|
||||
- **WimTV**
|
||||
- **WinSportsVideo**
|
||||
- **Wistia**
|
||||
|
@ -1800,7 +1836,6 @@
|
|||
- **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
|
||||
- **youtube:search_url**: YouTube search URLs with sorting and filter support
|
||||
- **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video)
|
||||
- **youtube:stories**: YouTube channel stories; "ytstories:" prefix
|
||||
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
|
||||
- **youtube:tab**: YouTube Tabs
|
||||
- **youtube:user**: YouTube user videos; "ytuser:" prefix
|
||||
|
|
21
test/conftest.py
Normal file
21
test/conftest.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
import functools
|
||||
import inspect
|
||||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.networking import RequestHandler
|
||||
from yt_dlp.networking.common import _REQUEST_HANDLERS
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def handler(request):
|
||||
RH_KEY = request.param
|
||||
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
||||
handler = RH_KEY
|
||||
elif RH_KEY in _REQUEST_HANDLERS:
|
||||
handler = _REQUEST_HANDLERS[RH_KEY]
|
||||
else:
|
||||
pytest.skip(f'{RH_KEY} request handler is not available')
|
||||
|
||||
return functools.partial(handler, logger=FakeLogger)
|
|
@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||
import copy
|
||||
import json
|
||||
|
||||
from test.helper import FakeYDL, assertRegexpMatches
|
||||
from test.helper import FakeYDL, assertRegexpMatches, try_rm
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_os_name
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
|
@ -24,6 +24,7 @@ from yt_dlp.utils import (
|
|||
int_or_none,
|
||||
match_filter_func,
|
||||
)
|
||||
from yt_dlp.utils.traversal import traverse_obj
|
||||
|
||||
TEST_URL = 'http://localhost/sample.mp4'
|
||||
|
||||
|
@ -630,7 +631,6 @@ class TestYoutubeDL(unittest.TestCase):
|
|||
self.assertEqual(test_dict['playlist'], 'funny videos')
|
||||
|
||||
outtmpl_info = {
|
||||
'id': '1234',
|
||||
'id': '1234',
|
||||
'ext': 'mp4',
|
||||
'width': None,
|
||||
|
@ -684,7 +684,8 @@ class TestYoutubeDL(unittest.TestCase):
|
|||
test('%(id)s.%(ext)s', '1234.mp4')
|
||||
test('%(duration_string)s', ('27:46:40', '27-46-40'))
|
||||
test('%(resolution)s', '1080p')
|
||||
test('%(playlist_index)s', '001')
|
||||
test('%(playlist_index|)s', '001')
|
||||
test('%(playlist_index&{}!)s', '1!')
|
||||
test('%(playlist_autonumber)s', '02')
|
||||
test('%(autonumber)s', '00001')
|
||||
test('%(autonumber+2)03d', '005', autonumber_start=3)
|
||||
|
@ -783,9 +784,9 @@ class TestYoutubeDL(unittest.TestCase):
|
|||
test('%(title4)#S', 'foo_bar_test')
|
||||
test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' ')))
|
||||
if compat_os_name == 'nt':
|
||||
test('%(title4)q', ('"foo \\"bar\\" test"', ""foo ⧹"bar⧹" test""))
|
||||
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', '"id 1" "id 2" "id 3"'))
|
||||
test('%(formats.0.id)#q', ('"id 1"', '"id 1"'))
|
||||
test('%(title4)q', ('"foo ""bar"" test"', None))
|
||||
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
|
||||
test('%(formats.0.id)#q', ('"id 1"', None))
|
||||
else:
|
||||
test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\''))
|
||||
test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'")
|
||||
|
@ -829,6 +830,7 @@ class TestYoutubeDL(unittest.TestCase):
|
|||
test('%(id&hi {:>10} {}|)s', 'hi 1234 1234')
|
||||
test(R'%(id&{0} {}|)s', 'NA')
|
||||
test(R'%(id&{0.1}|)s', 'NA')
|
||||
test('%(height&{:,d})S', '1,080')
|
||||
|
||||
# Laziness
|
||||
def gen():
|
||||
|
@ -1213,6 +1215,129 @@ class TestYoutubeDL(unittest.TestCase):
|
|||
self.assertEqual(downloaded['extractor'], 'Video')
|
||||
self.assertEqual(downloaded['extractor_key'], 'Video')
|
||||
|
||||
def test_header_cookies(self):
|
||||
from http.cookiejar import Cookie
|
||||
|
||||
ydl = FakeYDL()
|
||||
ydl.report_warning = lambda *_, **__: None
|
||||
|
||||
def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
|
||||
return Cookie(
|
||||
version or 0, name, value, None, False,
|
||||
domain, bool(domain), bool(domain), path, bool(path),
|
||||
secure, expires, False, None, None, rest={})
|
||||
|
||||
_test_url = 'https://yt.dlp/test'
|
||||
|
||||
def test(encoded_cookies, cookies, *, headers=False, round_trip=None, error_re=None):
|
||||
def _test():
|
||||
ydl.cookiejar.clear()
|
||||
ydl._load_cookies(encoded_cookies, autoscope=headers)
|
||||
if headers:
|
||||
ydl._apply_header_cookies(_test_url)
|
||||
data = {'url': _test_url}
|
||||
ydl._calc_headers(data)
|
||||
self.assertCountEqual(
|
||||
map(vars, ydl.cookiejar), map(vars, cookies),
|
||||
'Extracted cookiejar.Cookie is not the same')
|
||||
if not headers:
|
||||
self.assertEqual(
|
||||
data.get('cookies'), round_trip or encoded_cookies,
|
||||
'Cookie is not the same as round trip')
|
||||
ydl.__dict__['_YoutubeDL__header_cookies'] = []
|
||||
|
||||
with self.subTest(msg=encoded_cookies):
|
||||
if not error_re:
|
||||
_test()
|
||||
return
|
||||
with self.assertRaisesRegex(Exception, error_re):
|
||||
_test()
|
||||
|
||||
test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
|
||||
test('test=value', [cookie('test', 'value')], error_re=r'Unscoped cookies are not allowed')
|
||||
test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
|
||||
cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
|
||||
cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
|
||||
test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [
|
||||
cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)])
|
||||
test('test="value; "; path=/test; domain=.yt.dlp', [
|
||||
cookie('test', 'value; ', domain='.yt.dlp', path='/test')],
|
||||
round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test')
|
||||
test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')],
|
||||
round_trip='name=""; Domain=.yt.dlp')
|
||||
|
||||
test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
|
||||
test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error_re=r'Invalid syntax')
|
||||
ydl.deprecated_feature = ydl.report_error
|
||||
test('test=value', [], headers=True, error_re=r'Passing cookies as a header is a potential security risk')
|
||||
|
||||
def test_infojson_cookies(self):
|
||||
TEST_FILE = 'test_infojson_cookies.info.json'
|
||||
TEST_URL = 'https://example.com/example.mp4'
|
||||
COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
|
||||
COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
|
||||
|
||||
ydl = FakeYDL()
|
||||
ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
|
||||
|
||||
def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
|
||||
fmt = {'url': TEST_URL}
|
||||
if fmts_header_cookies:
|
||||
fmt['http_headers'] = COOKIE_HEADER
|
||||
if cookies_field:
|
||||
fmt['cookies'] = COOKIES
|
||||
return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
|
||||
|
||||
def test(initial_info, note):
|
||||
result = {}
|
||||
result['processed'] = ydl.process_ie_result(initial_info)
|
||||
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
|
||||
msg=f'No cookies set in cookiejar after initial process when {note}')
|
||||
ydl.cookiejar.clear()
|
||||
with open(TEST_FILE) as infojson:
|
||||
result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
|
||||
result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
|
||||
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
|
||||
msg=f'No cookies set in cookiejar after final process when {note}')
|
||||
ydl.cookiejar.clear()
|
||||
for key in ('processed', 'loaded', 'final'):
|
||||
info = result[key]
|
||||
self.assertIsNone(
|
||||
traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
|
||||
msg=f'Cookie header not removed in {key} result when {note}')
|
||||
self.assertEqual(
|
||||
traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
|
||||
msg=f'No cookies field found in {key} result when {note}')
|
||||
|
||||
test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
|
||||
test(make_info(info_header_cookies=True), 'info_dict header cokies')
|
||||
test(make_info(fmts_header_cookies=True), 'format header cookies')
|
||||
test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
|
||||
test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
|
||||
test(make_info(cookies_field=True), 'cookies format field')
|
||||
test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
|
||||
|
||||
try_rm(TEST_FILE)
|
||||
|
||||
def test_add_headers_cookie(self):
|
||||
def check_for_cookie_header(result):
|
||||
return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
|
||||
|
||||
ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
|
||||
ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com
|
||||
|
||||
fmt = {'url': 'https://example.com/video.mp4'}
|
||||
result = ydl.process_ie_result(_make_result([fmt]), download=False)
|
||||
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
|
||||
self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
|
||||
self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
|
||||
|
||||
fmt = {'url': 'https://wrong.com/video.mp4'}
|
||||
result = ydl.process_ie_result(_make_result([fmt]), download=False)
|
||||
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
|
||||
self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
|
||||
self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -17,10 +17,10 @@ from yt_dlp.cookies import YoutubeDLCookieJar
|
|||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||
def test_keep_session_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
try:
|
||||
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.save(filename=tf.name)
|
||||
temp = tf.read().decode()
|
||||
self.assertTrue(re.search(
|
||||
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
|
||||
|
@ -32,7 +32,7 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
|||
|
||||
def test_strip_httponly_prefix(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
|
||||
def assert_cookie_has_value(key):
|
||||
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
|
||||
|
@ -42,17 +42,25 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
|||
|
||||
def test_malformed_cookies(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
# Cookies should be empty since all malformed cookie file entries
|
||||
# will be ignored
|
||||
self.assertFalse(cookiejar._cookies)
|
||||
|
||||
def test_get_cookie_header(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||
cookiejar.load()
|
||||
header = cookiejar.get_cookie_header('https://www.foobar.foobar')
|
||||
self.assertIn('HTTPONLY_COOKIE', header)
|
||||
|
||||
def test_get_cookies_for_url(self):
|
||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||
cookiejar.load()
|
||||
cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
|
||||
self.assertEqual(len(cookies), 2)
|
||||
cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
|
||||
self.assertFalse(cookies)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -9,15 +9,16 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||
|
||||
|
||||
import struct
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp import compat
|
||||
from yt_dlp.compat import urllib # isort: split
|
||||
from yt_dlp.compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from yt_dlp.compat.urllib.request import getproxies
|
||||
|
||||
|
||||
class TestCompat(unittest.TestCase):
|
||||
|
@ -28,8 +29,7 @@ class TestCompat(unittest.TestCase):
|
|||
with self.assertWarns(DeprecationWarning):
|
||||
compat.WINDOWS_VT_MODE
|
||||
|
||||
# TODO: Test submodule
|
||||
# compat.asyncio.events # Must not raise error
|
||||
self.assertEqual(urllib.request.getproxies, getproxies)
|
||||
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
compat.compat_pycrypto_AES # Must not raise error
|
||||
|
|
|
@ -10,10 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||
|
||||
import collections
|
||||
import hashlib
|
||||
import http.client
|
||||
import json
|
||||
import socket
|
||||
import urllib.error
|
||||
|
||||
from test.helper import (
|
||||
assertGreaterEqual,
|
||||
|
@ -29,10 +26,12 @@ from test.helper import (
|
|||
|
||||
import yt_dlp.YoutubeDL # isort: split
|
||||
from yt_dlp.extractor import get_info_extractor
|
||||
from yt_dlp.networking.exceptions import HTTPError, TransportError
|
||||
from yt_dlp.utils import (
|
||||
DownloadError,
|
||||
ExtractorError,
|
||||
UnavailableVideoError,
|
||||
YoutubeDLError,
|
||||
format_bytes,
|
||||
join_nonempty,
|
||||
)
|
||||
|
@ -102,6 +101,8 @@ def generator(test_case, tname):
|
|||
print_skipping('IE marked as not _WORKING')
|
||||
|
||||
for tc in test_cases:
|
||||
if tc.get('expected_exception'):
|
||||
continue
|
||||
info_dict = tc.get('info_dict', {})
|
||||
params = tc.get('params', {})
|
||||
if not info_dict.get('id'):
|
||||
|
@ -141,6 +142,17 @@ def generator(test_case, tname):
|
|||
|
||||
res_dict = None
|
||||
|
||||
def match_exception(err):
|
||||
expected_exception = test_case.get('expected_exception')
|
||||
if not expected_exception:
|
||||
return False
|
||||
if err.__class__.__name__ == expected_exception:
|
||||
return True
|
||||
for exc in err.exc_info:
|
||||
if exc.__class__.__name__ == expected_exception:
|
||||
return True
|
||||
return False
|
||||
|
||||
def try_rm_tcs_files(tcs=None):
|
||||
if tcs is None:
|
||||
tcs = test_cases
|
||||
|
@ -162,8 +174,9 @@ def generator(test_case, tname):
|
|||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine)
|
||||
or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)):
|
||||
if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
|
||||
if match_exception(err):
|
||||
return
|
||||
err.msg = f'{getattr(err, "msg", err)} ({tname})'
|
||||
raise
|
||||
|
||||
|
@ -174,6 +187,10 @@ def generator(test_case, tname):
|
|||
print(f'Retrying: {try_num} failed tries\n\n##########\n\n')
|
||||
|
||||
try_num += 1
|
||||
except YoutubeDLError as err:
|
||||
if match_exception(err):
|
||||
return
|
||||
raise
|
||||
else:
|
||||
break
|
||||
|
||||
|
@ -249,7 +266,7 @@ def generator(test_case, tname):
|
|||
# extractor returns full results even with extract_flat
|
||||
res_tcs = [{'info_dict': e} for e in res_dict['entries']]
|
||||
try_rm_tcs_files(res_tcs)
|
||||
|
||||
ydl.close()
|
||||
return test_template
|
||||
|
||||
|
||||
|
|
139
test/test_downloader_external.py
Normal file
139
test/test_downloader_external.py
Normal file
|
@ -0,0 +1,139 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import http.cookiejar
|
||||
|
||||
from test.helper import FakeYDL
|
||||
from yt_dlp.downloader.external import (
|
||||
Aria2cFD,
|
||||
AxelFD,
|
||||
CurlFD,
|
||||
FFmpegFD,
|
||||
HttpieFD,
|
||||
WgetFD,
|
||||
)
|
||||
|
||||
TEST_COOKIE = {
|
||||
'version': 0,
|
||||
'name': 'test',
|
||||
'value': 'ytdlp',
|
||||
'port': None,
|
||||
'port_specified': False,
|
||||
'domain': '.example.com',
|
||||
'domain_specified': True,
|
||||
'domain_initial_dot': False,
|
||||
'path': '/',
|
||||
'path_specified': True,
|
||||
'secure': False,
|
||||
'expires': None,
|
||||
'discard': False,
|
||||
'comment': None,
|
||||
'comment_url': None,
|
||||
'rest': {},
|
||||
}
|
||||
|
||||
TEST_INFO = {'url': 'http://www.example.com/'}
|
||||
|
||||
|
||||
class TestHttpieFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = HttpieFD(ydl, {})
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['http', '--download', '--output', 'test', 'http://www.example.com/'])
|
||||
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp'])
|
||||
|
||||
|
||||
class TestAxelFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = AxelFD(ydl, {})
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['axel', '-o', 'test', '--', 'http://www.example.com/'])
|
||||
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertEqual(
|
||||
downloader._make_cmd('test', TEST_INFO),
|
||||
['axel', '-o', 'test', '-H', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
|
||||
|
||||
|
||||
class TestWgetFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = WgetFD(ydl, {})
|
||||
self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||
# Test cookiejar tempfile arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
|
||||
|
||||
|
||||
class TestCurlFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = CurlFD(ydl, {})
|
||||
self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||
# Test cookie header is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
|
||||
self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO))
|
||||
|
||||
|
||||
class TestAria2cFD(unittest.TestCase):
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = Aria2cFD(ydl, {})
|
||||
downloader._make_cmd('test', TEST_INFO)
|
||||
self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
|
||||
|
||||
# Test cookiejar tempfile arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
cmd = downloader._make_cmd('test', TEST_INFO)
|
||||
self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd)
|
||||
|
||||
|
||||
@unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found')
|
||||
class TestFFmpegFD(unittest.TestCase):
|
||||
_args = []
|
||||
|
||||
def _test_cmd(self, args):
|
||||
self._args = args
|
||||
|
||||
def test_make_cmd(self):
|
||||
with FakeYDL() as ydl:
|
||||
downloader = FFmpegFD(ydl, {})
|
||||
downloader._debug_cmd = self._test_cmd
|
||||
|
||||
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/',
|
||||
'-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
# Test cookies arg is added
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
|
||||
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
|
||||
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
# Test with non-url input (ffmpeg reads from stdin '-' for websockets)
|
||||
downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'})
|
||||
self.assertEqual(self._args, [
|
||||
'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -16,6 +16,7 @@ from test.helper import http_server_port, try_rm
|
|||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.downloader.http import HttpFD
|
||||
from yt_dlp.utils import encodeFilename
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
@ -67,17 +68,6 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
|||
assert False
|
||||
|
||||
|
||||
class FakeLogger:
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHttpFD(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.httpd = http.server.HTTPServer(
|
||||
|
|
|
@ -45,6 +45,9 @@ class TestExecution(unittest.TestCase):
|
|||
self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
|
||||
|
||||
_, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
|
||||
# `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated python versions
|
||||
if stderr and stderr.startswith('Deprecated Feature: Support for Python'):
|
||||
stderr = ''
|
||||
self.assertFalse(stderr)
|
||||
|
||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
|
||||
|
|
|
@ -1,500 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import gzip
|
||||
import http.cookiejar
|
||||
import http.server
|
||||
import io
|
||||
import pathlib
|
||||
import ssl
|
||||
import tempfile
|
||||
import threading
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import zlib
|
||||
|
||||
from test.helper import http_server_port
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.dependencies import brotli
|
||||
from yt_dlp.utils import sanitized_Request, urlencode_postdata
|
||||
|
||||
from .helper import FakeYDL
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
protocol_version = 'HTTP/1.1'
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def _headers(self):
|
||||
payload = str(self.headers).encode('utf-8')
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _redirect(self):
|
||||
self.send_response(int(self.path[len('/redirect_'):]))
|
||||
self.send_header('Location', '/method')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
|
||||
def _method(self, method, payload=None):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Length', str(len(payload or '')))
|
||||
self.send_header('Method', method)
|
||||
self.end_headers()
|
||||
if payload:
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _status(self, status):
|
||||
payload = f'<html>{status} NOT FOUND</html>'.encode()
|
||||
self.send_response(int(status))
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _read_data(self):
|
||||
if 'Content-Length' in self.headers:
|
||||
return self.rfile.read(int(self.headers['Content-Length']))
|
||||
|
||||
def do_POST(self):
|
||||
data = self._read_data()
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('POST', data)
|
||||
elif self.path.startswith('/headers'):
|
||||
self._headers()
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_HEAD(self):
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('HEAD')
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_PUT(self):
|
||||
data = self._read_data()
|
||||
if self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('PUT', data)
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/video.html':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload))) # required for persistent connections
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/vid.mp4':
|
||||
payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'video/mp4')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/%E4%B8%AD%E6%96%87.html':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path == '/%c7%9f':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
elif self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
self._method('GET')
|
||||
elif self.path.startswith('/headers'):
|
||||
self._headers()
|
||||
elif self.path == '/trailing_garbage':
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.send_header('Content-Encoding', 'gzip')
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||
f.write(payload)
|
||||
compressed = buf.getvalue() + b'trailing garbage'
|
||||
self.send_header('Content-Length', str(len(compressed)))
|
||||
self.end_headers()
|
||||
self.wfile.write(compressed)
|
||||
elif self.path == '/302-non-ascii-redirect':
|
||||
new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
|
||||
self.send_response(301)
|
||||
self.send_header('Location', new_url)
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path == '/content-encoding':
|
||||
encodings = self.headers.get('ytdl-encoding', '')
|
||||
payload = b'<html><video src="/vid.mp4" /></html>'
|
||||
for encoding in filter(None, (e.strip() for e in encodings.split(','))):
|
||||
if encoding == 'br' and brotli:
|
||||
payload = brotli.compress(payload)
|
||||
elif encoding == 'gzip':
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||
f.write(payload)
|
||||
payload = buf.getvalue()
|
||||
elif encoding == 'deflate':
|
||||
payload = zlib.compress(payload)
|
||||
elif encoding == 'unsupported':
|
||||
payload = b'raw'
|
||||
break
|
||||
else:
|
||||
self._status(415)
|
||||
return
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Encoding', encodings)
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
else:
|
||||
self._status(404)
|
||||
|
||||
def send_header(self, keyword, value):
|
||||
"""
|
||||
Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
|
||||
This is against what is defined in RFC 3986, however we need to test we support this
|
||||
since some sites incorrectly do this.
|
||||
"""
|
||||
if keyword.lower() == 'connection':
|
||||
return super().send_header(keyword, value)
|
||||
|
||||
if not hasattr(self, '_headers_buffer'):
|
||||
self._headers_buffer = []
|
||||
|
||||
self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
|
||||
|
||||
|
||||
class FakeLogger:
|
||||
def debug(self, msg):
|
||||
pass
|
||||
|
||||
def warning(self, msg):
|
||||
pass
|
||||
|
||||
def error(self, msg):
|
||||
pass
|
||||
|
||||
|
||||
class TestHTTP(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# HTTP server
|
||||
self.http_httpd = http.server.ThreadingHTTPServer(
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
self.http_port = http_server_port(self.http_httpd)
|
||||
self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
|
||||
# FIXME: we should probably stop the http server thread after each test
|
||||
# See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
|
||||
self.http_server_thread.daemon = True
|
||||
self.http_server_thread.start()
|
||||
|
||||
# HTTPS server
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.https_httpd = http.server.ThreadingHTTPServer(
|
||||
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
self.https_httpd.socket = sslctx.wrap_socket(self.https_httpd.socket, server_side=True)
|
||||
self.https_port = http_server_port(self.https_httpd)
|
||||
self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
|
||||
self.https_server_thread.daemon = True
|
||||
self.https_server_thread.start()
|
||||
|
||||
def test_nocheckcertificate(self):
|
||||
with FakeYDL({'logger': FakeLogger()}) as ydl:
|
||||
with self.assertRaises(urllib.error.URLError):
|
||||
ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||
|
||||
with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
|
||||
r = ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||
self.assertEqual(r.status, 200)
|
||||
r.close()
|
||||
|
||||
def test_percent_encode(self):
|
||||
with FakeYDL() as ydl:
|
||||
# Unicode characters should be encoded with uppercase percent-encoding
|
||||
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
|
||||
self.assertEqual(res.status, 200)
|
||||
res.close()
|
||||
# don't normalize existing percent encodings
|
||||
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
|
||||
self.assertEqual(res.status, 200)
|
||||
res.close()
|
||||
|
||||
def test_unicode_path_redirection(self):
|
||||
with FakeYDL() as ydl:
|
||||
r = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||
self.assertEqual(r.url, f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html')
|
||||
r.close()
|
||||
|
||||
def test_redirect(self):
|
||||
with FakeYDL() as ydl:
|
||||
def do_req(redirect_status, method):
|
||||
data = b'testdata' if method in ('POST', 'PUT') else None
|
||||
res = ydl.urlopen(sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
|
||||
return res.read().decode('utf-8'), res.headers.get('method', '')
|
||||
|
||||
# A 303 must either use GET or HEAD for subsequent request
|
||||
self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
|
||||
|
||||
# 301 and 302 turn POST only into a GET
|
||||
self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
|
||||
self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
|
||||
self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
|
||||
self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
|
||||
|
||||
# 307 and 308 should not change method
|
||||
for m in ('POST', 'PUT'):
|
||||
self.assertEqual(do_req(307, m), ('testdata', m))
|
||||
self.assertEqual(do_req(308, m), ('testdata', m))
|
||||
|
||||
self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
|
||||
self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
|
||||
|
||||
# These should not redirect and instead raise an HTTPError
|
||||
for code in (300, 304, 305, 306):
|
||||
with self.assertRaises(urllib.error.HTTPError):
|
||||
do_req(code, 'GET')
|
||||
|
||||
def test_content_type(self):
|
||||
# https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
|
||||
with FakeYDL({'nocheckcertificate': True}) as ydl:
|
||||
# method should be auto-detected as POST
|
||||
r = sanitized_Request(f'https://localhost:{self.https_port}/headers', data=urlencode_postdata({'test': 'test'}))
|
||||
|
||||
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||
|
||||
# test http
|
||||
r = sanitized_Request(f'http://localhost:{self.http_port}/headers', data=urlencode_postdata({'test': 'test'}))
|
||||
headers = ydl.urlopen(r).read().decode('utf-8')
|
||||
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
|
||||
|
||||
def test_cookiejar(self):
|
||||
with FakeYDL() as ydl:
|
||||
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
|
||||
False, '/headers', True, False, None, False, None, None, {}))
|
||||
data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
|
||||
self.assertIn(b'Cookie: test=ytdlp', data)
|
||||
|
||||
def test_no_compression_compat_header(self):
|
||||
with FakeYDL() as ydl:
|
||||
data = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/headers',
|
||||
headers={'Youtubedl-no-compression': True})).read()
|
||||
self.assertIn(b'Accept-Encoding: identity', data)
|
||||
self.assertNotIn(b'youtubedl-no-compression', data.lower())
|
||||
|
||||
def test_gzip_trailing_garbage(self):
|
||||
# https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
|
||||
# https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
|
||||
with FakeYDL() as ydl:
|
||||
data = ydl.urlopen(sanitized_Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode('utf-8')
|
||||
self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
@unittest.skipUnless(brotli, 'brotli support is not installed')
|
||||
def test_brotli(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'br'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'br')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_deflate(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'deflate'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'deflate')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_gzip(self):
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'gzip'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'gzip')
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_multiple_encodings(self):
|
||||
# https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
|
||||
with FakeYDL() as ydl:
|
||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': pair}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), pair)
|
||||
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
|
||||
|
||||
def test_unsupported_encoding(self):
|
||||
# it should return the raw content
|
||||
with FakeYDL() as ydl:
|
||||
res = ydl.urlopen(
|
||||
sanitized_Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': 'unsupported'}))
|
||||
self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
|
||||
self.assertEqual(res.read(), b'raw')
|
||||
|
||||
|
||||
class TestClientCert(unittest.TestCase):
|
||||
def setUp(self):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
|
||||
cacertfn = os.path.join(self.certdir, 'ca.crt')
|
||||
self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.verify_mode = ssl.CERT_REQUIRED
|
||||
sslctx.load_verify_locations(cafile=cacertfn)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
|
||||
self.port = http_server_port(self.httpd)
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
def _run_test(self, **params):
|
||||
ydl = YoutubeDL({
|
||||
'logger': FakeLogger(),
|
||||
# Disable client-side validation of unacceptable self-signed testcert.pem
|
||||
# The test is of a check on the server side, so unaffected
|
||||
'nocheckcertificate': True,
|
||||
**params,
|
||||
})
|
||||
r = ydl.extract_info(f'https://127.0.0.1:{self.port}/video.html')
|
||||
self.assertEqual(r['url'], f'https://127.0.0.1:{self.port}/vid.mp4')
|
||||
|
||||
def test_certificate_combined_nopass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
|
||||
|
||||
def test_certificate_nocombined_nopass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
|
||||
client_certificate_key=os.path.join(self.certdir, 'client.key'))
|
||||
|
||||
def test_certificate_combined_pass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||
client_certificate_password='foobar')
|
||||
|
||||
def test_certificate_nocombined_pass(self):
|
||||
self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
|
||||
client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'),
|
||||
client_certificate_password='foobar')
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
proxy_name = name
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
|
||||
return HTTPTestRequestHandler
|
||||
|
||||
|
||||
class TestProxy(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.proxy = http.server.HTTPServer(
|
||||
('127.0.0.1', 0), _build_proxy_handler('normal'))
|
||||
self.port = http_server_port(self.proxy)
|
||||
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
|
||||
self.proxy_thread.daemon = True
|
||||
self.proxy_thread.start()
|
||||
|
||||
self.geo_proxy = http.server.HTTPServer(
|
||||
('127.0.0.1', 0), _build_proxy_handler('geo'))
|
||||
self.geo_port = http_server_port(self.geo_proxy)
|
||||
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
|
||||
self.geo_proxy_thread.daemon = True
|
||||
self.geo_proxy_thread.start()
|
||||
|
||||
def test_proxy(self):
|
||||
geo_proxy = f'127.0.0.1:{self.geo_port}'
|
||||
ydl = YoutubeDL({
|
||||
'proxy': f'127.0.0.1:{self.port}',
|
||||
'geo_verification_proxy': geo_proxy,
|
||||
})
|
||||
url = 'http://foo.com/bar'
|
||||
response = ydl.urlopen(url).read().decode()
|
||||
self.assertEqual(response, f'normal: {url}')
|
||||
|
||||
req = urllib.request.Request(url)
|
||||
req.add_header('Ytdl-request-proxy', geo_proxy)
|
||||
response = ydl.urlopen(req).read().decode()
|
||||
self.assertEqual(response, f'geo: {url}')
|
||||
|
||||
def test_proxy_with_idn(self):
|
||||
ydl = YoutubeDL({
|
||||
'proxy': f'127.0.0.1:{self.port}',
|
||||
})
|
||||
url = 'http://中文.tw/'
|
||||
response = ydl.urlopen(url).read().decode()
|
||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
||||
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
|
||||
|
||||
|
||||
class TestFileURL(unittest.TestCase):
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||
def test_file_urls(self):
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
tf.write(b'foobar')
|
||||
tf.close()
|
||||
url = pathlib.Path(tf.name).as_uri()
|
||||
with FakeYDL() as ydl:
|
||||
self.assertRaisesRegex(
|
||||
urllib.error.URLError, 'file:// URLs are explicitly disabled in yt-dlp for security reasons', ydl.urlopen, url)
|
||||
with FakeYDL({'enable_file_urls': True}) as ydl:
|
||||
res = ydl.urlopen(url)
|
||||
self.assertEqual(res.read(), b'foobar')
|
||||
res.close()
|
||||
os.unlink(tf.name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
1439
test/test_networking.py
Normal file
1439
test/test_networking.py
Normal file
File diff suppressed because it is too large
Load diff
282
test/test_networking_utils.py
Normal file
282
test/test_networking_utils.py
Normal file
|
@ -0,0 +1,282 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import platform
|
||||
import random
|
||||
import ssl
|
||||
import urllib.error
|
||||
import warnings
|
||||
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import certifi
|
||||
from yt_dlp.networking import Response
|
||||
from yt_dlp.networking._helper import (
|
||||
InstanceStoreMixin,
|
||||
add_accept_encoding_header,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
select_proxy,
|
||||
ssl_load_certs,
|
||||
)
|
||||
from yt_dlp.networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
_CompatHTTPError,
|
||||
)
|
||||
from yt_dlp.socks import ProxyType
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
class TestNetworkingUtils:
|
||||
|
||||
def test_select_proxy(self):
|
||||
proxies = {
|
||||
'all': 'socks5://example.com',
|
||||
'http': 'http://example.com:1080',
|
||||
'no': 'bypass.example.com,yt-dl.org'
|
||||
}
|
||||
|
||||
assert select_proxy('https://example.com', proxies) == proxies['all']
|
||||
assert select_proxy('http://example.com', proxies) == proxies['http']
|
||||
assert select_proxy('http://bypass.example.com', proxies) is None
|
||||
assert select_proxy('https://yt-dl.org', proxies) is None
|
||||
|
||||
@pytest.mark.parametrize('socks_proxy,expected', [
|
||||
('socks5h://example.com', {
|
||||
'proxytype': ProxyType.SOCKS5,
|
||||
'addr': 'example.com',
|
||||
'port': 1080,
|
||||
'rdns': True,
|
||||
'username': None,
|
||||
'password': None
|
||||
}),
|
||||
('socks5://user:@example.com:5555', {
|
||||
'proxytype': ProxyType.SOCKS5,
|
||||
'addr': 'example.com',
|
||||
'port': 5555,
|
||||
'rdns': False,
|
||||
'username': 'user',
|
||||
'password': ''
|
||||
}),
|
||||
('socks4://u%40ser:pa%20ss@127.0.0.1:1080', {
|
||||
'proxytype': ProxyType.SOCKS4,
|
||||
'addr': '127.0.0.1',
|
||||
'port': 1080,
|
||||
'rdns': False,
|
||||
'username': 'u@ser',
|
||||
'password': 'pa ss'
|
||||
}),
|
||||
('socks4a://:pa%20ss@127.0.0.1', {
|
||||
'proxytype': ProxyType.SOCKS4A,
|
||||
'addr': '127.0.0.1',
|
||||
'port': 1080,
|
||||
'rdns': True,
|
||||
'username': '',
|
||||
'password': 'pa ss'
|
||||
})
|
||||
])
|
||||
def test_make_socks_proxy_opts(self, socks_proxy, expected):
|
||||
assert make_socks_proxy_opts(socks_proxy) == expected
|
||||
|
||||
def test_make_socks_proxy_unknown(self):
|
||||
with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'):
|
||||
make_socks_proxy_opts('socks://127.0.0.1')
|
||||
|
||||
@pytest.mark.skipif(not certifi, reason='certifi is not installed')
|
||||
def test_load_certifi(self):
|
||||
context_certifi = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context_certifi.load_verify_locations(cafile=certifi.where())
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context, use_certifi=True)
|
||||
assert context.get_ca_certs() == context_certifi.get_ca_certs()
|
||||
|
||||
context_default = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
context_default.load_default_certs()
|
||||
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
ssl_load_certs(context, use_certifi=False)
|
||||
assert context.get_ca_certs() == context_default.get_ca_certs()
|
||||
|
||||
if context_default.get_ca_certs() == context_certifi.get_ca_certs():
|
||||
pytest.skip('System uses certifi as default. The test is not valid')
|
||||
|
||||
@pytest.mark.parametrize('method,status,expected', [
|
||||
('GET', 303, 'GET'),
|
||||
('HEAD', 303, 'HEAD'),
|
||||
('PUT', 303, 'GET'),
|
||||
('POST', 301, 'GET'),
|
||||
('HEAD', 301, 'HEAD'),
|
||||
('POST', 302, 'GET'),
|
||||
('HEAD', 302, 'HEAD'),
|
||||
('PUT', 302, 'PUT'),
|
||||
('POST', 308, 'POST'),
|
||||
('POST', 307, 'POST'),
|
||||
('HEAD', 308, 'HEAD'),
|
||||
('HEAD', 307, 'HEAD'),
|
||||
])
|
||||
def test_get_redirect_method(self, method, status, expected):
|
||||
assert get_redirect_method(method, status) == expected
|
||||
|
||||
@pytest.mark.parametrize('headers,supported_encodings,expected', [
|
||||
({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}),
|
||||
({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}),
|
||||
({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}),
|
||||
])
|
||||
def test_add_accept_encoding_header(self, headers, supported_encodings, expected):
|
||||
headers = HTTPHeaderDict(headers)
|
||||
add_accept_encoding_header(headers, supported_encodings)
|
||||
assert headers == HTTPHeaderDict(expected)
|
||||
|
||||
|
||||
class TestInstanceStoreMixin:
|
||||
|
||||
class FakeInstanceStoreMixin(InstanceStoreMixin):
|
||||
def _create_instance(self, **kwargs):
|
||||
return random.randint(0, 1000000)
|
||||
|
||||
def _close_instance(self, instance):
|
||||
pass
|
||||
|
||||
def test_mixin(self):
|
||||
mixin = self.FakeInstanceStoreMixin()
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
|
||||
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
|
||||
|
||||
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}}))
|
||||
|
||||
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3])
|
||||
|
||||
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4])
|
||||
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar)
|
||||
|
||||
assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar())
|
||||
|
||||
# Different order
|
||||
assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar)
|
||||
|
||||
m = mixin._get_instance(t=1234)
|
||||
assert mixin._get_instance(t=1234) == m
|
||||
mixin._clear_instances()
|
||||
assert mixin._get_instance(t=1234) != m
|
||||
|
||||
|
||||
class TestNetworkingExceptions:
|
||||
|
||||
@staticmethod
|
||||
def create_response(status):
|
||||
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
|
||||
def test_http_error(self, http_error_class):
|
||||
|
||||
response = self.create_response(403)
|
||||
error = http_error_class(response)
|
||||
|
||||
assert error.status == 403
|
||||
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
|
||||
assert error.reason == response.reason
|
||||
assert error.response is response
|
||||
|
||||
data = error.response.read()
|
||||
assert data == b'test'
|
||||
assert repr(error) == '<HTTPError 403: Forbidden>'
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
|
||||
def test_redirect_http_error(self, http_error_class):
|
||||
response = self.create_response(301)
|
||||
error = http_error_class(response, redirect_loop=True)
|
||||
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
|
||||
assert error.reason == 'Moved Permanently'
|
||||
|
||||
def test_compat_http_error(self):
|
||||
response = self.create_response(403)
|
||||
error = _CompatHTTPError(HTTPError(response))
|
||||
assert isinstance(error, HTTPError)
|
||||
assert isinstance(error, urllib.error.HTTPError)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def raises_deprecation_warning():
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('always')
|
||||
yield
|
||||
|
||||
if len(w) == 0:
|
||||
pytest.fail('Did not raise DeprecationWarning')
|
||||
if len(w) > 1:
|
||||
pytest.fail(f'Raised multiple warnings: {w}')
|
||||
|
||||
if not issubclass(w[-1].category, DeprecationWarning):
|
||||
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
|
||||
w.clear()
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.code == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.getcode() == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.hdrs is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.info() is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.headers is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.filename == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.url == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.geturl() == error.response.url
|
||||
|
||||
# Passthrough file operations
|
||||
with raises_deprecation_warning():
|
||||
assert error.read() == b'test'
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert not error.closed
|
||||
|
||||
with raises_deprecation_warning():
|
||||
# Technically Response operations are also passed through, which should not be used.
|
||||
assert error.get_header('test') == 'test'
|
||||
|
||||
# Should not raise a warning
|
||||
error.close()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
|
||||
def test_compat_http_error_autoclose(self):
|
||||
# Compat HTTPError should not autoclose response
|
||||
response = self.create_response(403)
|
||||
_CompatHTTPError(HTTPError(response))
|
||||
assert not response.closed
|
||||
|
||||
def test_incomplete_read_error(self):
|
||||
error = IncompleteRead(4, 3, cause='test')
|
||||
assert isinstance(error, IncompleteRead)
|
||||
assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
|
||||
assert str(error) == error.msg == '4 bytes read, 3 more expected'
|
||||
assert error.partial == 4
|
||||
assert error.expected == 3
|
||||
assert error.cause == 'test'
|
||||
|
||||
error = IncompleteRead(3)
|
||||
assert repr(error) == '<IncompleteRead: 3 bytes read>'
|
||||
assert str(error) == '3 bytes read'
|
|
@ -1,113 +1,450 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import abc
|
||||
import contextlib
|
||||
import enum
|
||||
import functools
|
||||
import http.server
|
||||
import json
|
||||
import random
|
||||
import subprocess
|
||||
import urllib.request
|
||||
import socket
|
||||
import struct
|
||||
import time
|
||||
from socketserver import (
|
||||
BaseRequestHandler,
|
||||
StreamRequestHandler,
|
||||
ThreadingTCPServer,
|
||||
)
|
||||
|
||||
from test.helper import FakeYDL, get_params, is_download_test
|
||||
from test.helper import http_server_port
|
||||
from yt_dlp.networking import Request
|
||||
from yt_dlp.networking.exceptions import ProxyError, TransportError
|
||||
from yt_dlp.socks import (
|
||||
SOCKS4_REPLY_VERSION,
|
||||
SOCKS4_VERSION,
|
||||
SOCKS5_USER_AUTH_SUCCESS,
|
||||
SOCKS5_USER_AUTH_VERSION,
|
||||
SOCKS5_VERSION,
|
||||
Socks5AddressType,
|
||||
Socks5Auth,
|
||||
)
|
||||
|
||||
SOCKS5_USER_AUTH_FAILURE = 0x1
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestMultipleSocks(unittest.TestCase):
|
||||
@staticmethod
|
||||
def _check_params(attrs):
|
||||
params = get_params()
|
||||
for attr in attrs:
|
||||
if attr not in params:
|
||||
print('Missing %s. Skipping.' % attr)
|
||||
class Socks4CD(enum.IntEnum):
|
||||
REQUEST_GRANTED = 90
|
||||
REQUEST_REJECTED_OR_FAILED = 91
|
||||
REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD = 92
|
||||
REQUEST_REJECTED_DIFFERENT_USERID = 93
|
||||
|
||||
|
||||
class Socks5Reply(enum.IntEnum):
|
||||
SUCCEEDED = 0x0
|
||||
GENERAL_FAILURE = 0x1
|
||||
CONNECTION_NOT_ALLOWED = 0x2
|
||||
NETWORK_UNREACHABLE = 0x3
|
||||
HOST_UNREACHABLE = 0x4
|
||||
CONNECTION_REFUSED = 0x5
|
||||
TTL_EXPIRED = 0x6
|
||||
COMMAND_NOT_SUPPORTED = 0x7
|
||||
ADDRESS_TYPE_NOT_SUPPORTED = 0x8
|
||||
|
||||
|
||||
class SocksTestRequestHandler(BaseRequestHandler):
|
||||
|
||||
def __init__(self, *args, socks_info=None, **kwargs):
|
||||
self.socks_info = socks_info
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class SocksProxyHandler(BaseRequestHandler):
|
||||
def __init__(self, request_handler_class, socks_server_kwargs, *args, **kwargs):
|
||||
self.socks_kwargs = socks_server_kwargs or {}
|
||||
self.request_handler_class = request_handler_class
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class Socks5ProxyHandler(StreamRequestHandler, SocksProxyHandler):
|
||||
|
||||
# SOCKS5 protocol https://tools.ietf.org/html/rfc1928
|
||||
# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929
|
||||
|
||||
def handle(self):
|
||||
sleep = self.socks_kwargs.get('sleep')
|
||||
if sleep:
|
||||
time.sleep(sleep)
|
||||
version, nmethods = self.connection.recv(2)
|
||||
assert version == SOCKS5_VERSION
|
||||
methods = list(self.connection.recv(nmethods))
|
||||
|
||||
auth = self.socks_kwargs.get('auth')
|
||||
|
||||
if auth is not None and Socks5Auth.AUTH_USER_PASS not in methods:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE))
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
|
||||
elif Socks5Auth.AUTH_USER_PASS in methods:
|
||||
self.connection.sendall(struct.pack("!BB", SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS))
|
||||
|
||||
_, user_len = struct.unpack('!BB', self.connection.recv(2))
|
||||
username = self.connection.recv(user_len).decode()
|
||||
pass_len = ord(self.connection.recv(1))
|
||||
password = self.connection.recv(pass_len).decode()
|
||||
|
||||
if username == auth[0] and password == auth[1]:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_SUCCESS))
|
||||
else:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_FAILURE))
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
return params
|
||||
|
||||
def test_proxy_http(self):
|
||||
params = self._check_params(['primary_proxy', 'primary_server_ip'])
|
||||
if params is None:
|
||||
return
|
||||
ydl = FakeYDL({
|
||||
'proxy': params['primary_proxy']
|
||||
})
|
||||
self.assertEqual(
|
||||
ydl.urlopen('http://yt-dl.org/ip').read().decode(),
|
||||
params['primary_server_ip'])
|
||||
|
||||
def test_proxy_https(self):
|
||||
params = self._check_params(['primary_proxy', 'primary_server_ip'])
|
||||
if params is None:
|
||||
return
|
||||
ydl = FakeYDL({
|
||||
'proxy': params['primary_proxy']
|
||||
})
|
||||
self.assertEqual(
|
||||
ydl.urlopen('https://yt-dl.org/ip').read().decode(),
|
||||
params['primary_server_ip'])
|
||||
|
||||
def test_secondary_proxy_http(self):
|
||||
params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
|
||||
if params is None:
|
||||
return
|
||||
ydl = FakeYDL()
|
||||
req = urllib.request.Request('http://yt-dl.org/ip')
|
||||
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
|
||||
self.assertEqual(
|
||||
ydl.urlopen(req).read().decode(),
|
||||
params['secondary_server_ip'])
|
||||
|
||||
def test_secondary_proxy_https(self):
|
||||
params = self._check_params(['secondary_proxy', 'secondary_server_ip'])
|
||||
if params is None:
|
||||
return
|
||||
ydl = FakeYDL()
|
||||
req = urllib.request.Request('https://yt-dl.org/ip')
|
||||
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
|
||||
self.assertEqual(
|
||||
ydl.urlopen(req).read().decode(),
|
||||
params['secondary_server_ip'])
|
||||
|
||||
|
||||
@is_download_test
|
||||
class TestSocks(unittest.TestCase):
|
||||
_SKIP_SOCKS_TEST = True
|
||||
|
||||
def setUp(self):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
elif Socks5Auth.AUTH_NONE in methods:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NONE))
|
||||
else:
|
||||
self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE))
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
|
||||
self.port = random.randint(20000, 30000)
|
||||
self.server_process = subprocess.Popen([
|
||||
'srelay', '-f', '-i', '127.0.0.1:%d' % self.port],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
version, command, _, address_type = struct.unpack('!BBBB', self.connection.recv(4))
|
||||
socks_info = {
|
||||
'version': version,
|
||||
'auth_methods': methods,
|
||||
'command': command,
|
||||
'client_address': self.client_address,
|
||||
'ipv4_address': None,
|
||||
'domain_address': None,
|
||||
'ipv6_address': None,
|
||||
}
|
||||
if address_type == Socks5AddressType.ATYP_IPV4:
|
||||
socks_info['ipv4_address'] = socket.inet_ntoa(self.connection.recv(4))
|
||||
elif address_type == Socks5AddressType.ATYP_DOMAINNAME:
|
||||
socks_info['domain_address'] = self.connection.recv(ord(self.connection.recv(1))).decode()
|
||||
elif address_type == Socks5AddressType.ATYP_IPV6:
|
||||
socks_info['ipv6_address'] = socket.inet_ntop(socket.AF_INET6, self.connection.recv(16))
|
||||
else:
|
||||
self.server.close_request(self.request)
|
||||
|
||||
def tearDown(self):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
socks_info['port'] = struct.unpack('!H', self.connection.recv(2))[0]
|
||||
|
||||
# dummy response, the returned IP is just a placeholder
|
||||
self.connection.sendall(struct.pack(
|
||||
'!BBBBIH', SOCKS5_VERSION, self.socks_kwargs.get('reply', Socks5Reply.SUCCEEDED), 0x0, 0x1, 0x7f000001, 40000))
|
||||
|
||||
self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
|
||||
|
||||
|
||||
class Socks4ProxyHandler(StreamRequestHandler, SocksProxyHandler):
|
||||
|
||||
# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol
|
||||
# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol
|
||||
|
||||
def _read_until_null(self):
|
||||
return b''.join(iter(functools.partial(self.connection.recv, 1), b'\x00'))
|
||||
|
||||
def handle(self):
|
||||
sleep = self.socks_kwargs.get('sleep')
|
||||
if sleep:
|
||||
time.sleep(sleep)
|
||||
socks_info = {
|
||||
'version': SOCKS4_VERSION,
|
||||
'command': None,
|
||||
'client_address': self.client_address,
|
||||
'ipv4_address': None,
|
||||
'port': None,
|
||||
'domain_address': None,
|
||||
}
|
||||
version, command, dest_port, dest_ip = struct.unpack('!BBHI', self.connection.recv(8))
|
||||
socks_info['port'] = dest_port
|
||||
socks_info['command'] = command
|
||||
if version != SOCKS4_VERSION:
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
use_remote_dns = False
|
||||
if 0x0 < dest_ip <= 0xFF:
|
||||
use_remote_dns = True
|
||||
else:
|
||||
socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack("!I", dest_ip))
|
||||
|
||||
user_id = self._read_until_null().decode()
|
||||
if user_id != (self.socks_kwargs.get('user_id') or ''):
|
||||
self.connection.sendall(struct.pack(
|
||||
'!BBHI', SOCKS4_REPLY_VERSION, Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID, 0x00, 0x00000000))
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
|
||||
self.server_process.terminate()
|
||||
self.server_process.communicate()
|
||||
if use_remote_dns:
|
||||
socks_info['domain_address'] = self._read_until_null().decode()
|
||||
|
||||
def _get_ip(self, protocol):
|
||||
if self._SKIP_SOCKS_TEST:
|
||||
return '127.0.0.1'
|
||||
# dummy response, the returned IP is just a placeholder
|
||||
self.connection.sendall(
|
||||
struct.pack(
|
||||
'!BBHI', SOCKS4_REPLY_VERSION,
|
||||
self.socks_kwargs.get('cd_reply', Socks4CD.REQUEST_GRANTED), 40000, 0x7f000001))
|
||||
|
||||
ydl = FakeYDL({
|
||||
'proxy': '%s://127.0.0.1:%d' % (protocol, self.port),
|
||||
})
|
||||
return ydl.urlopen('http://yt-dl.org/ip').read().decode()
|
||||
self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info)
|
||||
|
||||
def test_socks4(self):
|
||||
self.assertTrue(isinstance(self._get_ip('socks4'), str))
|
||||
|
||||
def test_socks4a(self):
|
||||
self.assertTrue(isinstance(self._get_ip('socks4a'), str))
|
||||
class IPv6ThreadingTCPServer(ThreadingTCPServer):
|
||||
address_family = socket.AF_INET6
|
||||
|
||||
def test_socks5(self):
|
||||
self.assertTrue(isinstance(self._get_ip('socks5'), str))
|
||||
|
||||
class SocksHTTPTestRequestHandler(http.server.BaseHTTPRequestHandler, SocksTestRequestHandler):
|
||||
def do_GET(self):
|
||||
if self.path == '/socks_info':
|
||||
payload = json.dumps(self.socks_info.copy())
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload.encode())
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def socks_server(socks_server_class, request_handler, bind_ip=None, **socks_server_kwargs):
|
||||
server = server_thread = None
|
||||
try:
|
||||
bind_address = bind_ip or '127.0.0.1'
|
||||
server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
|
||||
server = server_type(
|
||||
(bind_address, 0), functools.partial(socks_server_class, request_handler, socks_server_kwargs))
|
||||
server_port = http_server_port(server)
|
||||
server_thread = threading.Thread(target=server.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
if '.' not in bind_address:
|
||||
yield f'[{bind_address}]:{server_port}'
|
||||
else:
|
||||
yield f'{bind_address}:{server_port}'
|
||||
finally:
|
||||
server.shutdown()
|
||||
server.server_close()
|
||||
server_thread.join(2.0)
|
||||
|
||||
|
||||
class SocksProxyTestContext(abc.ABC):
|
||||
REQUEST_HANDLER_CLASS = None
|
||||
|
||||
def socks_server(self, server_class, *args, **kwargs):
|
||||
return socks_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
|
||||
|
||||
@abc.abstractmethod
|
||||
def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
|
||||
"""return a dict of socks_info"""
|
||||
|
||||
|
||||
class HTTPSocksTestProxyContext(SocksProxyTestContext):
|
||||
REQUEST_HANDLER_CLASS = SocksHTTPTestRequestHandler
|
||||
|
||||
def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||
request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/socks_info', **req_kwargs)
|
||||
handler.validate(request)
|
||||
return json.loads(handler.send(request).read().decode())
|
||||
|
||||
|
||||
CTX_MAP = {
|
||||
'http': HTTPSocksTestProxyContext,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def ctx(request):
|
||||
return CTX_MAP[request.param]()
|
||||
|
||||
|
||||
class TestSocks4Proxy:
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks4_no_auth(self, handler, ctx):
|
||||
with handler() as rh:
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
response = ctx.socks_info_request(
|
||||
rh, proxies={'all': f'socks4://{server_address}'})
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks4_auth(self, handler, ctx):
|
||||
with handler() as rh:
|
||||
with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh, proxies={'all': f'socks4://{server_address}'})
|
||||
response = ctx.socks_info_request(
|
||||
rh, proxies={'all': f'socks4://user:@{server_address}'})
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks4a_ipv4_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['version'] == 4
|
||||
assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks4a_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='localhost')
|
||||
assert response['version'] == 4
|
||||
assert response['ipv4_address'] is None
|
||||
assert response['domain_address'] == 'localhost'
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
with handler(proxies={'all': f'socks4://{server_address}'},
|
||||
source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['client_address'][0] == source_address
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('reply_code', [
|
||||
Socks4CD.REQUEST_REJECTED_OR_FAILED,
|
||||
Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD,
|
||||
Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID,
|
||||
])
|
||||
def test_socks4_errors(self, handler, ctx, reply_code):
|
||||
with ctx.socks_server(Socks4ProxyHandler, cd_reply=reply_code) as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_ipv6_socks4_proxy(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['client_address'][0] == '::1'
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['version'] == 4
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_timeout(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
|
||||
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
|
||||
class TestSocks5Proxy:
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks5_no_auth(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['auth_methods'] == [0x0]
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks5_user_pass(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address:
|
||||
with handler() as rh:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh, proxies={'all': f'socks5://{server_address}'})
|
||||
|
||||
response = ctx.socks_info_request(
|
||||
rh, proxies={'all': f'socks5://test:testpass@{server_address}'})
|
||||
|
||||
assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS]
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks5_ipv4_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks5_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='localhost')
|
||||
assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks5h_domain_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='localhost')
|
||||
assert response['ipv4_address'] is None
|
||||
assert response['domain_address'] == 'localhost'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks5h_ip_target(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['domain_address'] is None
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_socks5_ipv6_destination(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='[::1]')
|
||||
assert response['ipv6_address'] == '::1'
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_ipv6_socks5_proxy(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
response = ctx.socks_info_request(rh, target_domain='127.0.0.1')
|
||||
assert response['client_address'][0] == '::1'
|
||||
assert response['ipv4_address'] == '127.0.0.1'
|
||||
assert response['version'] == 5
|
||||
|
||||
# XXX: is there any feasible way of testing IPv6 source addresses?
|
||||
# Same would go for non-proxy source_address test...
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['client_address'][0] == source_address
|
||||
assert response['version'] == 5
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
@pytest.mark.parametrize('reply_code', [
|
||||
Socks5Reply.GENERAL_FAILURE,
|
||||
Socks5Reply.CONNECTION_NOT_ALLOWED,
|
||||
Socks5Reply.NETWORK_UNREACHABLE,
|
||||
Socks5Reply.HOST_UNREACHABLE,
|
||||
Socks5Reply.CONNECTION_REFUSED,
|
||||
Socks5Reply.TTL_EXPIRED,
|
||||
Socks5Reply.COMMAND_NOT_SUPPORTED,
|
||||
Socks5Reply.ADDRESS_TYPE_NOT_SUPPORTED,
|
||||
])
|
||||
def test_socks5_errors(self, handler, ctx, reply_code):
|
||||
with ctx.socks_server(Socks5ProxyHandler, reply=reply_code) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
|
||||
def test_timeout(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler, sleep=2) as server_address:
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}, timeout=1) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
ctx.socks_info_request(rh)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -14,6 +14,7 @@ import contextlib
|
|||
import io
|
||||
import itertools
|
||||
import json
|
||||
import subprocess
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from yt_dlp.compat import (
|
||||
|
@ -28,6 +29,7 @@ from yt_dlp.utils import (
|
|||
InAdvancePagedList,
|
||||
LazyList,
|
||||
OnDemandPagedList,
|
||||
Popen,
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
base_url,
|
||||
|
@ -47,10 +49,9 @@ from yt_dlp.utils import (
|
|||
encode_base_n,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
escape_rfc3986,
|
||||
escape_url,
|
||||
expand_path,
|
||||
extract_attributes,
|
||||
extract_basic_auth,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
|
@ -103,7 +104,6 @@ from yt_dlp.utils import (
|
|||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
|
@ -132,6 +132,12 @@ from yt_dlp.utils import (
|
|||
xpath_text,
|
||||
xpath_with_ns,
|
||||
)
|
||||
from yt_dlp.utils.networking import (
|
||||
HTTPHeaderDict,
|
||||
escape_rfc3986,
|
||||
normalize_url,
|
||||
remove_dot_segments,
|
||||
)
|
||||
|
||||
|
||||
class TestUtil(unittest.TestCase):
|
||||
|
@ -258,15 +264,6 @@ class TestUtil(unittest.TestCase):
|
|||
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
|
||||
self.assertEqual(sanitize_url('foo bar'), 'foo bar')
|
||||
|
||||
def test_extract_basic_auth(self):
|
||||
auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
|
||||
self.assertFalse(auth_header('http://foo.bar'))
|
||||
self.assertFalse(auth_header('http://:foo.bar'))
|
||||
self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
|
||||
self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
|
||||
self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
|
||||
self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
|
||||
|
||||
def test_expand_path(self):
|
||||
def env(var):
|
||||
return f'%{var}%' if sys.platform == 'win32' else f'${var}'
|
||||
|
@ -668,6 +665,8 @@ class TestUtil(unittest.TestCase):
|
|||
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
|
||||
self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
|
||||
self.assertEqual(parse_duration('103:050'), 103.05)
|
||||
self.assertEqual(parse_duration('1HR 3MIN'), 3780)
|
||||
self.assertEqual(parse_duration('2hrs 3mins'), 7380)
|
||||
|
||||
def test_fix_xml_ampersands(self):
|
||||
self.assertEqual(
|
||||
|
@ -944,24 +943,45 @@ class TestUtil(unittest.TestCase):
|
|||
self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar')
|
||||
self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar')
|
||||
|
||||
def test_escape_url(self):
|
||||
def test_normalize_url(self):
|
||||
self.assertEqual(
|
||||
escape_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
|
||||
normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'),
|
||||
'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
|
||||
normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'),
|
||||
'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/фрагмент'),
|
||||
normalize_url('http://тест.рф/фрагмент'),
|
||||
'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
|
||||
)
|
||||
self.assertEqual(
|
||||
escape_url('http://тест.рф/абв?абв=абв#абв'),
|
||||
normalize_url('http://тест.рф/абв?абв=абв#абв'),
|
||||
'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
|
||||
)
|
||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||
self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||
|
||||
self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html')
|
||||
|
||||
def test_remove_dot_segments(self):
|
||||
self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g')
|
||||
self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6')
|
||||
self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd')
|
||||
self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/')
|
||||
self.assertEqual(remove_dot_segments('/..'), '/')
|
||||
self.assertEqual(remove_dot_segments('/./'), '/')
|
||||
self.assertEqual(remove_dot_segments('/./a'), '/a')
|
||||
self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi')
|
||||
self.assertEqual(remove_dot_segments('/'), '/')
|
||||
self.assertEqual(remove_dot_segments('/t'), '/t')
|
||||
self.assertEqual(remove_dot_segments('t'), 't')
|
||||
self.assertEqual(remove_dot_segments(''), '')
|
||||
self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c')
|
||||
self.assertEqual(remove_dot_segments('../a'), 'a')
|
||||
self.assertEqual(remove_dot_segments('./a'), 'a')
|
||||
self.assertEqual(remove_dot_segments('.'), '')
|
||||
self.assertEqual(remove_dot_segments('////'), '////')
|
||||
|
||||
def test_js_to_json_vars_strings(self):
|
||||
self.assertDictEqual(
|
||||
|
@ -1194,6 +1214,9 @@ class TestUtil(unittest.TestCase):
|
|||
on = js_to_json('\'"\\""\'')
|
||||
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
|
||||
|
||||
on = js_to_json('[new Date("spam"), \'("eggs")\']')
|
||||
self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string')
|
||||
|
||||
def test_js_to_json_malformed(self):
|
||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||
|
@ -1205,6 +1228,14 @@ class TestUtil(unittest.TestCase):
|
|||
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
||||
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
||||
|
||||
def test_js_to_json_common_constructors(self):
|
||||
self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5})
|
||||
self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10])
|
||||
self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5])
|
||||
self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5})
|
||||
self.assertEqual(json.loads(js_to_json('new Date("123")')), "123")
|
||||
self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19")
|
||||
|
||||
def test_extract_attributes(self):
|
||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||
|
@ -1840,6 +1871,8 @@ Line 1
|
|||
def test_clean_podcast_url(self):
|
||||
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
|
||||
self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661')
|
||||
self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3')
|
||||
|
||||
def test_LazyList(self):
|
||||
it = list(range(10))
|
||||
|
@ -2327,6 +2360,61 @@ Line 1
|
|||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give group name as well')
|
||||
|
||||
def test_http_header_dict(self):
|
||||
headers = HTTPHeaderDict()
|
||||
headers['ytdl-test'] = b'0'
|
||||
self.assertEqual(list(headers.items()), [('Ytdl-Test', '0')])
|
||||
headers['ytdl-test'] = 1
|
||||
self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
|
||||
headers['Ytdl-test'] = '2'
|
||||
self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')])
|
||||
self.assertTrue('ytDl-Test' in headers)
|
||||
self.assertEqual(str(headers), str(dict(headers)))
|
||||
self.assertEqual(repr(headers), str(dict(headers)))
|
||||
|
||||
headers.update({'X-dlp': 'data'})
|
||||
self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')})
|
||||
self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'})
|
||||
self.assertEqual(len(headers), 2)
|
||||
self.assertEqual(headers.copy(), headers)
|
||||
headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'})
|
||||
self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')})
|
||||
self.assertEqual(len(headers2), 2)
|
||||
headers2.clear()
|
||||
self.assertEqual(len(headers2), 0)
|
||||
|
||||
# ensure we prefer latter headers
|
||||
headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2})
|
||||
self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')})
|
||||
del headers3['ytdl-tesT']
|
||||
self.assertEqual(dict(headers3), {})
|
||||
|
||||
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
|
||||
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
|
||||
|
||||
def test_extract_basic_auth(self):
|
||||
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
|
||||
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
|
||||
assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==')
|
||||
assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=')
|
||||
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
|
||||
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
||||
|
||||
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
|
||||
def test_Popen_windows_escaping(self):
|
||||
def run_shell(args):
|
||||
stdout, stderr, error = Popen.run(
|
||||
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
assert not stderr
|
||||
assert not error
|
||||
return stdout
|
||||
|
||||
# Test escaping
|
||||
assert run_shell(['echo', 'test"&']) == '"test""&"\n'
|
||||
# Test if delayed expansion is disabled
|
||||
assert run_shell(['echo', '^!']) == '"^!"\n'
|
||||
assert run_shell('echo "^!"') == '"^!"\n'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import collections
|
||||
import contextlib
|
||||
import copy
|
||||
import datetime
|
||||
import errno
|
||||
import fileinput
|
||||
import functools
|
||||
import http.cookiejar
|
||||
import io
|
||||
import itertools
|
||||
import json
|
||||
|
@ -23,20 +24,25 @@ import traceback
|
|||
import unicodedata
|
||||
|
||||
from .cache import Cache
|
||||
from .compat import urllib # isort: split
|
||||
from .compat import compat_os_name, compat_shlex_quote
|
||||
from .cookies import load_cookies
|
||||
from .downloader import (
|
||||
DashSegmentsFD,
|
||||
FFmpegFD,
|
||||
get_suitable_downloader,
|
||||
shorten_protocol_name,
|
||||
)
|
||||
from .compat import functools, urllib # isort: split
|
||||
from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
|
||||
from .cookies import LenientSimpleCookie, load_cookies
|
||||
from .downloader import FFmpegFD, DashSegmentsFD, get_suitable_downloader, shorten_protocol_name
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
from .extractor import gen_extractor_classes, get_info_extractor
|
||||
from .extractor.common import UnsupportedURLIE
|
||||
from .extractor.openload import PhantomJSwrapper
|
||||
from .minicurses import format_text
|
||||
from .networking import HEADRequest, Request, RequestDirector
|
||||
from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
|
||||
from .networking.exceptions import (
|
||||
HTTPError,
|
||||
NoSupportingHandlers,
|
||||
RequestError,
|
||||
SSLError,
|
||||
_CompatHTTPError,
|
||||
network_exceptions,
|
||||
)
|
||||
from .plugins import directories as plugin_directories
|
||||
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
|
||||
from .postprocessor import (
|
||||
|
@ -54,7 +60,7 @@ from .postprocessor import (
|
|||
get_postprocessor,
|
||||
)
|
||||
from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
|
||||
from .update import REPOSITORY, current_git_head, detect_variant
|
||||
from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant
|
||||
from .utils import (
|
||||
DEFAULT_OUTTMPL,
|
||||
IDENTITY,
|
||||
|
@ -75,13 +81,11 @@ from .utils import (
|
|||
ExtractorError,
|
||||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
HEADRequest,
|
||||
ISO3166Utils,
|
||||
LazyList,
|
||||
MaxDownloadsReached,
|
||||
Namespace,
|
||||
PagedList,
|
||||
PerRequestProxyHandler,
|
||||
PlaylistEntries,
|
||||
Popen,
|
||||
PostProcessingError,
|
||||
|
@ -90,9 +94,6 @@ from .utils import (
|
|||
SameFileError,
|
||||
UnavailableVideoError,
|
||||
UserNotLive,
|
||||
YoutubeDLCookieProcessor,
|
||||
YoutubeDLHandler,
|
||||
YoutubeDLRedirectHandler,
|
||||
age_restricted,
|
||||
args_to_str,
|
||||
bug_reports_message,
|
||||
|
@ -105,6 +106,7 @@ from .utils import (
|
|||
error_to_compat_str,
|
||||
escapeHTML,
|
||||
expand_path,
|
||||
extract_basic_auth,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_bytes,
|
||||
|
@ -120,9 +122,6 @@ from .utils import (
|
|||
locked_file,
|
||||
make_archive_id,
|
||||
make_dir,
|
||||
make_HTTPS_handler,
|
||||
merge_headers,
|
||||
network_exceptions,
|
||||
number_of_digits,
|
||||
orderedSet,
|
||||
orderedSet_from_options,
|
||||
|
@ -135,8 +134,6 @@ from .utils import (
|
|||
sanitize_filename,
|
||||
sanitize_path,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
std_headers,
|
||||
str_or_none,
|
||||
strftime_or_none,
|
||||
subtitles_filename,
|
||||
|
@ -154,6 +151,13 @@ from .utils import (
|
|||
write_json_file,
|
||||
write_string,
|
||||
)
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import (
|
||||
HTTPHeaderDict,
|
||||
clean_headers,
|
||||
clean_proxies,
|
||||
std_headers,
|
||||
)
|
||||
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
|
@ -235,9 +239,9 @@ class YoutubeDL:
|
|||
'selected' (check selected formats),
|
||||
or None (check only if requested by extractor)
|
||||
paths: Dictionary of output paths. The allowed keys are 'home'
|
||||
'temp' and the keys of OUTTMPL_TYPES (in utils.py)
|
||||
'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
|
||||
outtmpl: Dictionary of templates for output names. Allowed keys
|
||||
are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
|
||||
are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
|
||||
For compatibility with youtube-dl, a single string can also be used
|
||||
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
|
@ -252,8 +256,6 @@ class YoutubeDL:
|
|||
overwrites: Overwrite all video and metadata files if True,
|
||||
overwrite only non-video files if None
|
||||
and don't overwrite any file if False
|
||||
For compatibility with youtube-dl,
|
||||
"nooverwrites" may also be used instead
|
||||
playlist_items: Specific indices of playlist to download.
|
||||
playlistrandom: Download playlist items in random order.
|
||||
lazy_playlist: Process playlist entries as they are received.
|
||||
|
@ -420,7 +422,7 @@ class YoutubeDL:
|
|||
asked whether to download the video.
|
||||
- Raise utils.DownloadCancelled(msg) to abort remaining
|
||||
downloads when a video is rejected.
|
||||
match_filter_func in utils.py is one example for this.
|
||||
match_filter_func in utils/_utils.py is one example for this.
|
||||
color: A Dictionary with output stream names as keys
|
||||
and their respective color policy as values.
|
||||
Can also just be a single color policy,
|
||||
|
@ -549,6 +551,7 @@ class YoutubeDL:
|
|||
You can reduce network I/O by disabling it if you don't
|
||||
care about HLS. (only for youtube)
|
||||
no_color: Same as `color='no_color'`
|
||||
no_overwrites: Same as `overwrites=False`
|
||||
"""
|
||||
|
||||
_NUMERIC_FIELDS = {
|
||||
|
@ -568,7 +571,7 @@ class YoutubeDL:
|
|||
'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
|
||||
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
|
||||
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
|
||||
}
|
||||
|
@ -600,6 +603,7 @@ class YoutubeDL:
|
|||
self._playlist_level = 0
|
||||
self._playlist_urls = set()
|
||||
self.cache = Cache(self)
|
||||
self.__header_cookies = []
|
||||
|
||||
stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
|
||||
self._out_files = Namespace(
|
||||
|
@ -617,7 +621,8 @@ class YoutubeDL:
|
|||
|
||||
if self.params.get('no_color'):
|
||||
if self.params.get('color') is not None:
|
||||
self.report_warning('Overwriting params from "color" with "no_color"')
|
||||
self.params.setdefault('_warnings', []).append(
|
||||
'Overwriting params from "color" with "no_color"')
|
||||
self.params['color'] = 'no_color'
|
||||
|
||||
term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
|
||||
|
@ -627,7 +632,7 @@ class YoutubeDL:
|
|||
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
|
||||
if policy in ('auto', None):
|
||||
return term_allow_color and supports_terminal_sequences(stream)
|
||||
assert policy in ('always', 'never', 'no_color')
|
||||
assert policy in ('always', 'never', 'no_color'), policy
|
||||
return {'always': True, 'never': False}.get(policy, policy)
|
||||
|
||||
self._allow_colors = Namespace(**{
|
||||
|
@ -635,17 +640,9 @@ class YoutubeDL:
|
|||
for name, stream in self._out_files.items_ if name != 'console'
|
||||
})
|
||||
|
||||
# The code is left like this to be reused for future deprecations
|
||||
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
|
||||
current_version = sys.version_info[:2]
|
||||
if current_version < MIN_RECOMMENDED:
|
||||
msg = ('Support for Python version %d.%d has been deprecated. '
|
||||
'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
|
||||
'\n You will no longer receive updates on this version')
|
||||
if current_version < MIN_SUPPORTED:
|
||||
msg = 'Python version %d.%d is no longer supported'
|
||||
self.deprecated_feature(
|
||||
f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
|
||||
system_deprecation = _get_system_deprecation()
|
||||
if system_deprecation:
|
||||
self.deprecated_feature(system_deprecation.replace('\n', '\n '))
|
||||
|
||||
if self.params.get('allow_unplayable_formats'):
|
||||
self.report_warning(
|
||||
|
@ -675,6 +672,11 @@ class YoutubeDL:
|
|||
raise
|
||||
|
||||
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
|
||||
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
|
||||
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
|
||||
self.params['http_headers'].pop('Cookie', None)
|
||||
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
|
||||
|
||||
if auto_init and auto_init != 'no_verbose_header':
|
||||
self.print_debug_header()
|
||||
|
||||
|
@ -745,9 +747,6 @@ class YoutubeDL:
|
|||
else self.params['format'] if callable(self.params['format'])
|
||||
else self.build_format_selector(self.params['format']))
|
||||
|
||||
# Set http_headers defaults according to std_headers
|
||||
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
|
||||
|
||||
hooks = {
|
||||
'post_hooks': self.add_post_hook,
|
||||
'progress_hooks': self.add_progress_hook,
|
||||
|
@ -764,8 +763,6 @@ class YoutubeDL:
|
|||
get_postprocessor(pp_def.pop('key'))(self, **pp_def),
|
||||
when=when)
|
||||
|
||||
self._setup_opener()
|
||||
|
||||
def preload_download_archive(fn):
|
||||
"""Preload the archive, if any is specified"""
|
||||
archive = set()
|
||||
|
@ -941,11 +938,17 @@ class YoutubeDL:
|
|||
self.save_console_title()
|
||||
return self
|
||||
|
||||
def save_cookies(self):
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save()
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.restore_console_title()
|
||||
self.close()
|
||||
|
||||
if self.params.get('cookiefile') is not None:
|
||||
self.cookiejar.save(ignore_discard=True, ignore_expires=True)
|
||||
def close(self):
|
||||
self.save_cookies()
|
||||
self._request_director.close()
|
||||
|
||||
def trouble(self, message=None, tb=None, is_error=True):
|
||||
"""Determine action to take when a download problem appears.
|
||||
|
@ -988,6 +991,7 @@ class YoutubeDL:
|
|||
ID='green',
|
||||
DELIM='blue',
|
||||
ERROR='red',
|
||||
BAD_FORMAT='light red',
|
||||
WARNING='yellow',
|
||||
SUPPRESS='light black',
|
||||
)
|
||||
|
@ -1276,28 +1280,27 @@ class YoutubeDL:
|
|||
return outer_mobj.group(0)
|
||||
key = outer_mobj.group('key')
|
||||
mobj = re.match(INTERNAL_FORMAT_RE, key)
|
||||
initial_field = mobj.group('fields') if mobj else ''
|
||||
value, replacement, default = None, None, na
|
||||
value, replacement, default, last_field = None, None, na, ''
|
||||
while mobj:
|
||||
mobj = mobj.groupdict()
|
||||
default = mobj['default'] if mobj['default'] is not None else default
|
||||
value = get_value(mobj)
|
||||
replacement = mobj['replacement']
|
||||
last_field, replacement = mobj['fields'], mobj['replacement']
|
||||
if value is None and mobj['alternate']:
|
||||
mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
|
||||
else:
|
||||
break
|
||||
|
||||
fmt = outer_mobj.group('format')
|
||||
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
|
||||
fmt = f'0{field_size_compat_map[key]:d}d'
|
||||
|
||||
if None not in (value, replacement):
|
||||
try:
|
||||
value = replacement_formatter.format(replacement, value)
|
||||
except ValueError:
|
||||
value, default = None, na
|
||||
|
||||
fmt = outer_mobj.group('format')
|
||||
if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
|
||||
fmt = f'0{field_size_compat_map[last_field]:d}d'
|
||||
|
||||
flags = outer_mobj.group('conversion') or ''
|
||||
str_fmt = f'{fmt[:-1]}s'
|
||||
if value is None:
|
||||
|
@ -1327,7 +1330,7 @@ class YoutubeDL:
|
|||
value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
|
||||
factor=1024 if '#' in flags else 1000)
|
||||
elif fmt[-1] == 'S': # filename sanitization
|
||||
value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
|
||||
value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
|
||||
elif fmt[-1] == 'c':
|
||||
if value:
|
||||
value = str(value)[0]
|
||||
|
@ -1346,7 +1349,7 @@ class YoutubeDL:
|
|||
elif fmt[-1] == 'a':
|
||||
value, fmt = ascii(value), str_fmt
|
||||
if fmt[-1] in 'csra':
|
||||
value = sanitizer(initial_field, value)
|
||||
value = sanitizer(last_field, value)
|
||||
|
||||
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||
TMPL_DICT[key] = value
|
||||
|
@ -1481,7 +1484,10 @@ class YoutubeDL:
|
|||
return ret
|
||||
|
||||
if self.in_download_archive(info_dict):
|
||||
reason = '%s has already been recorded in the archive' % video_title
|
||||
reason = ''.join((
|
||||
format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
|
||||
format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
|
||||
'has already been recorded in the archive'))
|
||||
break_opt, break_err = 'break_on_existing', ExistingVideoReached
|
||||
else:
|
||||
try:
|
||||
|
@ -1542,7 +1548,8 @@ class YoutubeDL:
|
|||
|
||||
temp_id = ie.get_temp_id(url)
|
||||
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
|
||||
self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
|
||||
self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
|
||||
'has already been recorded in the archive')
|
||||
if self.params.get('break_on_existing', False):
|
||||
raise ExistingVideoReached()
|
||||
break
|
||||
|
@ -1630,8 +1637,67 @@ class YoutubeDL:
|
|||
self.to_screen('')
|
||||
raise
|
||||
|
||||
def _load_cookies(self, data, *, autoscope=True):
|
||||
"""Loads cookies from a `Cookie` header
|
||||
|
||||
This tries to work around the security vulnerability of passing cookies to every domain.
|
||||
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
|
||||
|
||||
@param data The Cookie header as string to load the cookies from
|
||||
@param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
|
||||
If `True`, save cookies for later to be stored in the jar with a limited scope
|
||||
If a URL, save cookies in the jar with the domain of the URL
|
||||
"""
|
||||
for cookie in LenientSimpleCookie(data).values():
|
||||
if autoscope and any(cookie.values()):
|
||||
raise ValueError('Invalid syntax in Cookie Header')
|
||||
|
||||
domain = cookie.get('domain') or ''
|
||||
expiry = cookie.get('expires')
|
||||
if expiry == '': # 0 is valid
|
||||
expiry = None
|
||||
prepared_cookie = http.cookiejar.Cookie(
|
||||
cookie.get('version') or 0, cookie.key, cookie.value, None, False,
|
||||
domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
|
||||
cookie.get('secure') or False, expiry, False, None, None, {})
|
||||
|
||||
if domain:
|
||||
self.cookiejar.set_cookie(prepared_cookie)
|
||||
elif autoscope is True:
|
||||
self.deprecated_feature(
|
||||
'Passing cookies as a header is a potential security risk; '
|
||||
'they will be scoped to the domain of the downloaded urls. '
|
||||
'Please consider loading cookies from a file or browser instead.')
|
||||
self.__header_cookies.append(prepared_cookie)
|
||||
elif autoscope:
|
||||
self.report_warning(
|
||||
'The extractor result contains an unscoped cookie as an HTTP header. '
|
||||
f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
|
||||
only_once=True)
|
||||
self._apply_header_cookies(autoscope, [prepared_cookie])
|
||||
else:
|
||||
self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
|
||||
tb=False, is_error=False)
|
||||
|
||||
def _apply_header_cookies(self, url, cookies=None):
|
||||
"""Applies stray header cookies to the provided url
|
||||
|
||||
This loads header cookies and scopes them to the domain provided in `url`.
|
||||
While this is not ideal, it helps reduce the risk of them being sent
|
||||
to an unintended destination while mostly maintaining compatibility.
|
||||
"""
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
if not parsed.hostname:
|
||||
return
|
||||
|
||||
for cookie in map(copy.copy, cookies or self.__header_cookies):
|
||||
cookie.domain = f'.{parsed.hostname}'
|
||||
self.cookiejar.set_cookie(cookie)
|
||||
|
||||
@_handle_extraction_exceptions
|
||||
def __extract_info(self, url, ie, download, extra_info, process):
|
||||
self._apply_header_cookies(url)
|
||||
|
||||
try:
|
||||
ie_result = ie.extract(url)
|
||||
except UserNotLive as e:
|
||||
|
@ -2091,8 +2157,6 @@ class YoutubeDL:
|
|||
allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
|
||||
'video': self.params.get('allow_multiple_video_streams', False)}
|
||||
|
||||
check_formats = self.params.get('check_formats') == 'selected'
|
||||
|
||||
def _parse_filter(tokens):
|
||||
filter_parts = []
|
||||
for type, string_, start, _, _ in tokens:
|
||||
|
@ -2265,10 +2329,19 @@ class YoutubeDL:
|
|||
return new_dict
|
||||
|
||||
def _check_formats(formats):
|
||||
if not check_formats:
|
||||
if self.params.get('check_formats') == 'selected':
|
||||
yield from self._check_formats(formats)
|
||||
return
|
||||
elif (self.params.get('check_formats') is not None
|
||||
or self.params.get('allow_unplayable_formats')):
|
||||
yield from formats
|
||||
return
|
||||
yield from self._check_formats(formats)
|
||||
|
||||
for f in formats:
|
||||
if f.get('has_drm'):
|
||||
yield from self._check_formats([f])
|
||||
else:
|
||||
yield f
|
||||
|
||||
def _build_selector_function(selector):
|
||||
if isinstance(selector, list): # ,
|
||||
|
@ -2407,14 +2480,34 @@ class YoutubeDL:
|
|||
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
|
||||
return _build_selector_function(parsed_selector)
|
||||
|
||||
def _calc_headers(self, info_dict):
|
||||
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
|
||||
if 'Youtubedl-No-Compression' in res: # deprecated
|
||||
res.pop('Youtubedl-No-Compression', None)
|
||||
res['Accept-Encoding'] = 'identity'
|
||||
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
|
||||
def _calc_headers(self, info_dict, load_cookies=False):
|
||||
res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
|
||||
clean_headers(res)
|
||||
|
||||
if load_cookies: # For --load-info-json
|
||||
self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
|
||||
self._load_cookies(info_dict.get('cookies'), autoscope=False)
|
||||
# The `Cookie` header is removed to prevent leaks and unscoped cookies.
|
||||
# See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
|
||||
res.pop('Cookie', None)
|
||||
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
|
||||
if cookies:
|
||||
res['Cookie'] = cookies
|
||||
encoder = LenientSimpleCookie()
|
||||
values = []
|
||||
for cookie in cookies:
|
||||
_, value = encoder.value_encode(cookie.value)
|
||||
values.append(f'{cookie.name}={value}')
|
||||
if cookie.domain:
|
||||
values.append(f'Domain={cookie.domain}')
|
||||
if cookie.path:
|
||||
values.append(f'Path={cookie.path}')
|
||||
if cookie.secure:
|
||||
values.append('Secure')
|
||||
if cookie.expires:
|
||||
values.append(f'Expires={cookie.expires}')
|
||||
if cookie.version:
|
||||
values.append(f'Version={cookie.version}')
|
||||
info_dict['cookies'] = '; '.join(values)
|
||||
|
||||
if 'X-Forwarded-For' not in res:
|
||||
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
|
||||
|
@ -2490,7 +2583,7 @@ class YoutubeDL:
|
|||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
with contextlib.suppress(ValueError, OverflowError, OSError):
|
||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||
upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
|
||||
live_keys = ('is_live', 'was_live')
|
||||
|
@ -2620,10 +2713,10 @@ class YoutubeDL:
|
|||
if field_preference:
|
||||
info_dict['_format_sort_fields'] = field_preference
|
||||
|
||||
# or None ensures --clean-infojson removes it
|
||||
info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
|
||||
info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
|
||||
f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
|
||||
if not self.params.get('allow_unplayable_formats'):
|
||||
formats = [f for f in formats if not f.get('has_drm')]
|
||||
formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
|
||||
|
||||
if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
|
||||
self.report_warning(
|
||||
|
@ -2675,7 +2768,12 @@ class YoutubeDL:
|
|||
and info_dict.get('duration') and format.get('tbr')
|
||||
and not format.get('filesize') and not format.get('filesize_approx')):
|
||||
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
|
||||
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
|
||||
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
|
||||
|
||||
# Safeguard against old/insecure infojson when using --load-info-json
|
||||
if info_dict.get('http_headers'):
|
||||
info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
|
||||
info_dict['http_headers'].pop('Cookie', None)
|
||||
|
||||
# This is copied to http_headers by the above _calc_headers and can now be removed
|
||||
if '__x_forwarded_for_ip' in info_dict:
|
||||
|
@ -2772,11 +2870,8 @@ class YoutubeDL:
|
|||
formats_to_download = list(format_selector({
|
||||
'formats': formats,
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (
|
||||
# All formats are video-only or
|
||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
|
||||
# all formats are audio-only
|
||||
or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
|
||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
if interactive_format_selection and not formats_to_download:
|
||||
self.report_error('Requested format is not available', tb=False, is_error=False)
|
||||
|
@ -3186,7 +3281,8 @@ class YoutubeDL:
|
|||
fd, success = None, True
|
||||
if info_dict.get('protocol') or info_dict.get('url'):
|
||||
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
|
||||
if not (fd is FFmpegFD or fd is DashSegmentsFD) and 'no-direct-merge' not in self.params['compat_opts'] and (info_dict.get('section_start') or info_dict.get('section_end')):
|
||||
if not (fd is FFmpegFD or fd is DashSegmentsFD) and 'no-direct-merge' not in self.params['compat_opts'] and (
|
||||
info_dict.get('section_start') or info_dict.get('section_end')):
|
||||
msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
|
||||
else 'You have requested downloading the video partially, but ffmpeg is not installed')
|
||||
self.report_error(f'{msg}. Aborting')
|
||||
|
@ -3349,7 +3445,8 @@ class YoutubeDL:
|
|||
) for pp in self._pps['post_process'])
|
||||
|
||||
if not postprocessed_by_ffmpeg:
|
||||
ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
|
||||
ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
|
||||
and info_dict.get('container') == 'm4a_dash',
|
||||
'writing DASH m4a. Only some players support this container',
|
||||
FFmpegFixupM4aPP)
|
||||
ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
|
||||
|
@ -3692,7 +3789,7 @@ class YoutubeDL:
|
|||
|
||||
def simplified_codec(f, field):
|
||||
assert field in ('acodec', 'vcodec')
|
||||
codec = f.get(field, 'unknown')
|
||||
codec = f.get(field)
|
||||
if not codec:
|
||||
return 'unknown'
|
||||
elif codec != 'none':
|
||||
|
@ -3727,14 +3824,13 @@ class YoutubeDL:
|
|||
simplified_codec(f, 'acodec'),
|
||||
format_field(f, 'abr', '\t%dk', func=round),
|
||||
format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
|
||||
join_nonempty(
|
||||
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
|
||||
self._format_out('DRM', 'light red') if f.get('has_drm') else None,
|
||||
format_field(f, 'language', '[%s]'),
|
||||
join_nonempty(format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
delim=', '),
|
||||
delim=' '),
|
||||
join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
|
||||
self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
|
||||
(self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
|
||||
else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
|
||||
format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
delim=', '), delim=' '),
|
||||
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
|
||||
header_line = self._list_format_headers(
|
||||
'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
|
||||
|
@ -3783,12 +3879,6 @@ class YoutubeDL:
|
|||
def list_subtitles(self, video_id, subtitles, name='subtitles'):
|
||||
self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
if isinstance(req, str):
|
||||
req = sanitized_Request(req)
|
||||
return self._opener.open(req, timeout=self._socket_timeout)
|
||||
|
||||
def print_debug_header(self):
|
||||
if not self.params.get('verbose'):
|
||||
return
|
||||
|
@ -3877,13 +3967,8 @@ class YoutubeDL:
|
|||
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
|
||||
})) or 'none'))
|
||||
|
||||
self._setup_opener()
|
||||
proxy_map = {}
|
||||
for handler in self._opener.handlers:
|
||||
if hasattr(handler, 'proxies'):
|
||||
proxy_map.update(handler.proxies)
|
||||
write_debug(f'Proxy map: {proxy_map}')
|
||||
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
# write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['%s%s' % (
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
|
@ -3911,58 +3996,110 @@ class YoutubeDL:
|
|||
'See https://yt-dl.org/update if you need help updating.' %
|
||||
latest_version)
|
||||
|
||||
def _setup_opener(self):
|
||||
if hasattr(self, '_opener'):
|
||||
return
|
||||
timeout_val = self.params.get('socket_timeout')
|
||||
self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
|
||||
|
||||
opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
|
||||
opts_cookiefile = self.params.get('cookiefile')
|
||||
@functools.cached_property
|
||||
def proxies(self):
|
||||
"""Global proxy configuration"""
|
||||
opts_proxy = self.params.get('proxy')
|
||||
|
||||
self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
|
||||
|
||||
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
|
||||
if opts_proxy is not None:
|
||||
if opts_proxy == '':
|
||||
proxies = {}
|
||||
else:
|
||||
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||
opts_proxy = '__noproxy__'
|
||||
proxies = {'all': opts_proxy}
|
||||
else:
|
||||
proxies = urllib.request.getproxies()
|
||||
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
|
||||
# compat. Set HTTPS_PROXY to __noproxy__ to revert
|
||||
if 'http' in proxies and 'https' not in proxies:
|
||||
proxies['https'] = proxies['http']
|
||||
proxy_handler = PerRequestProxyHandler(proxies)
|
||||
|
||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||
redirect_handler = YoutubeDLRedirectHandler()
|
||||
data_handler = urllib.request.DataHandler()
|
||||
return proxies
|
||||
|
||||
# When passing our own FileHandler instance, build_opener won't add the
|
||||
# default FileHandler and allows us to disable the file protocol, which
|
||||
# can be used for malicious purposes (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8227)
|
||||
file_handler = urllib.request.FileHandler()
|
||||
@functools.cached_property
|
||||
def cookiejar(self):
|
||||
"""Global cookiejar instance"""
|
||||
return load_cookies(
|
||||
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
|
||||
|
||||
if not self.params.get('enable_file_urls'):
|
||||
def file_open(*args, **kwargs):
|
||||
raise urllib.error.URLError(
|
||||
'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.')
|
||||
file_handler.file_open = file_open
|
||||
@property
|
||||
def _opener(self):
|
||||
"""
|
||||
Get a urllib OpenerDirector from the Urllib handler (deprecated).
|
||||
"""
|
||||
self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
|
||||
handler = self._request_director.handlers['Urllib']
|
||||
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
|
||||
|
||||
opener = urllib.request.build_opener(
|
||||
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
if isinstance(req, str):
|
||||
req = Request(req)
|
||||
elif isinstance(req, urllib.request.Request):
|
||||
self.deprecation_warning(
|
||||
'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
|
||||
'Use yt_dlp.networking.common.Request instead.')
|
||||
req = urllib_req_to_req(req)
|
||||
assert isinstance(req, Request)
|
||||
|
||||
# Delete the default user-agent header, which would otherwise apply in
|
||||
# cases where our custom HTTP handler doesn't come into play
|
||||
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
|
||||
opener.addheaders = []
|
||||
self._opener = opener
|
||||
# compat: Assume user:pass url params are basic auth
|
||||
url, basic_auth_header = extract_basic_auth(req.url)
|
||||
if basic_auth_header:
|
||||
req.headers['Authorization'] = basic_auth_header
|
||||
req.url = sanitize_url(url)
|
||||
|
||||
clean_proxies(proxies=req.proxies, headers=req.headers)
|
||||
clean_headers(req.headers)
|
||||
|
||||
try:
|
||||
return self._request_director.send(req)
|
||||
except NoSupportingHandlers as e:
|
||||
for ue in e.unsupported_errors:
|
||||
if not (ue.handler and ue.msg):
|
||||
continue
|
||||
if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
|
||||
raise RequestError(
|
||||
'file:// URLs are disabled by default in yt-dlp for security reasons. '
|
||||
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
|
||||
raise
|
||||
except SSLError as e:
|
||||
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
|
||||
raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
|
||||
elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
|
||||
raise RequestError(
|
||||
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
|
||||
'Try using --legacy-server-connect', cause=e) from e
|
||||
raise
|
||||
except HTTPError as e: # TODO: Remove in a future release
|
||||
raise _CompatHTTPError(e) from e
|
||||
|
||||
def build_request_director(self, handlers, preferences=None):
|
||||
logger = _YDLLogger(self)
|
||||
headers = self.params['http_headers'].copy()
|
||||
proxies = self.proxies.copy()
|
||||
clean_headers(headers)
|
||||
clean_proxies(proxies, headers)
|
||||
|
||||
director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
|
||||
for handler in handlers:
|
||||
director.add_handler(handler(
|
||||
logger=logger,
|
||||
headers=headers,
|
||||
cookiejar=self.cookiejar,
|
||||
proxies=proxies,
|
||||
prefer_system_certs='no-certifi' in self.params['compat_opts'],
|
||||
verify=not self.params.get('nocheckcertificate'),
|
||||
**traverse_obj(self.params, {
|
||||
'verbose': 'debug_printtraffic',
|
||||
'source_address': 'source_address',
|
||||
'timeout': 'socket_timeout',
|
||||
'legacy_ssl_support': 'legacyserverconnect',
|
||||
'enable_file_urls': 'enable_file_urls',
|
||||
'client_cert': {
|
||||
'client_certificate': 'client_certificate',
|
||||
'client_certificate_key': 'client_certificate_key',
|
||||
'client_certificate_password': 'client_certificate_password',
|
||||
},
|
||||
}),
|
||||
))
|
||||
director.preferences.update(preferences or [])
|
||||
return director
|
||||
|
||||
def encode(self, s):
|
||||
if isinstance(s, bytes):
|
||||
|
@ -4115,14 +4252,14 @@ class YoutubeDL:
|
|||
else:
|
||||
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
|
||||
try:
|
||||
uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
|
||||
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
|
||||
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
ret.append((thumb_filename, thumb_filename_final))
|
||||
t['filepath'] = thumb_filename
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, urllib.error.HTTPError) and err.code == 404:
|
||||
if isinstance(err, HTTPError) and err.status == 404:
|
||||
self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
|
||||
else:
|
||||
self.report_warning(f'Unable to download {thumb_display_id}: {err}')
|
||||
|
|
|
@ -58,11 +58,11 @@ from .utils import (
|
|||
read_stdin,
|
||||
render_table,
|
||||
setproctitle,
|
||||
std_headers,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
write_string,
|
||||
)
|
||||
from .utils.networking import std_headers
|
||||
from .YoutubeDL import YoutubeDL
|
||||
|
||||
_IN_CLI = False
|
||||
|
|
|
@ -18,7 +18,8 @@ def pycryptodome_module():
|
|||
|
||||
|
||||
def get_hidden_imports():
|
||||
yield 'yt_dlp.compat._legacy'
|
||||
yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
|
||||
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
|
||||
yield pycryptodome_module()
|
||||
yield from collect_submodules('websockets')
|
||||
# These are auto-detected, but explicitly add them just in case
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
import os
|
||||
import sys
|
||||
import warnings
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from ._deprecated import * # noqa: F401, F403
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
|
||||
passthrough_module(__name__, '._deprecated')
|
||||
del passthrough_module
|
||||
|
||||
|
||||
# HTMLParseError has been deprecated in Python 3.3 and removed in
|
||||
|
@ -33,7 +30,7 @@ compat_os_name = os._name if os.name == 'java' else os.name
|
|||
if compat_os_name == 'nt':
|
||||
def compat_shlex_quote(s):
|
||||
import re
|
||||
return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
|
||||
return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
|
||||
else:
|
||||
from shlex import quote as compat_shlex_quote # noqa: F401
|
||||
|
||||
|
@ -70,3 +67,13 @@ if compat_os_name in ('nt', 'ce'):
|
|||
return userhome + path[i:]
|
||||
else:
|
||||
compat_expanduser = os.path.expanduser
|
||||
|
||||
|
||||
def urllib_req_to_req(urllib_request):
|
||||
"""Convert urllib Request to a networking Request"""
|
||||
from ..networking import Request
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
return Request(
|
||||
urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
|
||||
headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
|
||||
extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
"""Deprecated - New code should avoid these"""
|
||||
import warnings
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
# XXX: Implement this the same way as other DeprecationWarnings without circular import
|
||||
passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
|
||||
del passthrough_module
|
||||
|
||||
import base64
|
||||
import urllib.error
|
||||
|
@ -8,7 +16,6 @@ compat_str = str
|
|||
|
||||
compat_b64decode = base64.b64decode
|
||||
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
|
|
|
@ -16,12 +16,12 @@ import shlex
|
|||
import shutil
|
||||
import socket
|
||||
import struct
|
||||
import subprocess
|
||||
import tokenize
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as etree
|
||||
from subprocess import DEVNULL
|
||||
|
||||
# isort: split
|
||||
import asyncio # noqa: F401
|
||||
|
@ -70,6 +70,7 @@ compat_html_parser_HTMLParseError = compat_HTMLParseError
|
|||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
|
@ -84,7 +85,7 @@ compat_socket_create_connection = socket.create_connection
|
|||
compat_Struct = struct.Struct
|
||||
compat_struct_pack = struct.pack
|
||||
compat_struct_unpack = struct.unpack
|
||||
compat_subprocess_get_DEVNULL = lambda: DEVNULL
|
||||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
|
|
|
@ -15,7 +15,7 @@ def get_package_info(module):
|
|||
name=getattr(module, '_yt_dlp__identifier', module.__name__),
|
||||
version=str(next(filter(None, (
|
||||
getattr(module, attr, None)
|
||||
for attr in ('__version__', 'version_string', 'version')
|
||||
for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version')
|
||||
)), None)))
|
||||
|
||||
|
||||
|
|
13
yt_dlp/compat/types.py
Normal file
13
yt_dlp/compat/types.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# flake8: noqa: F405
|
||||
from types import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'types')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
# NB: pypy has builtin NoneType, so checking NameError won't work
|
||||
from types import NoneType # >= 3.10
|
||||
except ImportError:
|
||||
NoneType = type(None)
|
|
@ -1,6 +1,9 @@
|
|||
# flake8: noqa: F405
|
||||
from urllib import * # noqa: F403
|
||||
|
||||
del request # noqa: F821
|
||||
from . import request # noqa: F401
|
||||
|
||||
from ..compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'urllib')
|
||||
|
|
|
@ -33,7 +33,6 @@ from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
|||
from .utils import (
|
||||
Popen,
|
||||
error_to_str,
|
||||
escape_url,
|
||||
expand_path,
|
||||
is_path_like,
|
||||
sanitize_url,
|
||||
|
@ -41,30 +40,16 @@ from .utils import (
|
|||
try_call,
|
||||
write_string,
|
||||
)
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
class YDLLogger:
|
||||
def __init__(self, ydl=None):
|
||||
self._ydl = ydl
|
||||
|
||||
def debug(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.write_debug(message)
|
||||
|
||||
def info(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.to_screen(f'[Cookies] {message}')
|
||||
|
||||
def warning(self, message, only_once=False):
|
||||
if self._ydl:
|
||||
self._ydl.report_warning(message, only_once)
|
||||
|
||||
def error(self, message):
|
||||
if self._ydl:
|
||||
self._ydl.report_error(message)
|
||||
class YDLLogger(_YDLLogger):
|
||||
def warning(self, message, only_once=False): # compat
|
||||
return super().warning(message, once=only_once)
|
||||
|
||||
class ProgressBar(MultilinePrinter):
|
||||
_DELAY, _timer = 0.1, 0
|
||||
|
@ -112,7 +97,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
|
|||
|
||||
jar = YoutubeDLCookieJar(cookie_file)
|
||||
if not is_filename or os.access(cookie_file, os.R_OK):
|
||||
jar.load(ignore_discard=True, ignore_expires=True)
|
||||
jar.load()
|
||||
cookie_jars.append(jar)
|
||||
|
||||
return _merge_cookie_jars(cookie_jars)
|
||||
|
@ -153,7 +138,7 @@ def _extract_firefox_cookies(profile, container, logger):
|
|||
containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
|
||||
if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
|
||||
raise FileNotFoundError(f'could not read containers.json in {search_root}')
|
||||
with open(containers_path) as containers:
|
||||
with open(containers_path, encoding='utf8') as containers:
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
|
@ -1228,7 +1213,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
|||
file.truncate(0)
|
||||
yield file
|
||||
|
||||
def _really_save(self, f, ignore_discard=False, ignore_expires=False):
|
||||
def _really_save(self, f, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if (not ignore_discard and cookie.discard
|
||||
|
@ -1249,7 +1234,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
|||
name, value
|
||||
)))
|
||||
|
||||
def save(self, filename=None, *args, **kwargs):
|
||||
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""
|
||||
Save cookies to a file.
|
||||
Code is taken from CPython 3.6
|
||||
|
@ -1268,9 +1253,9 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
|||
|
||||
with self.open(filename, write=True) as f:
|
||||
f.write(self._HEADER)
|
||||
self._really_save(f, *args, **kwargs)
|
||||
self._really_save(f, ignore_discard, ignore_expires)
|
||||
|
||||
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
|
||||
def load(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""Load cookies from a file."""
|
||||
if filename is None:
|
||||
if self.filename is not None:
|
||||
|
@ -1323,10 +1308,17 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
|||
|
||||
def get_cookie_header(self, url):
|
||||
"""Generate a Cookie HTTP header for a given url"""
|
||||
cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
|
||||
cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
|
||||
self.add_cookie_header(cookie_req)
|
||||
return cookie_req.get_header('Cookie')
|
||||
|
||||
def get_cookies_for_url(self, url):
|
||||
"""Generate a list of Cookie objects for a given url"""
|
||||
# Policy `_now` attribute must be set before calling `_cookies_for_request`
|
||||
# Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
|
||||
self._policy._now = self._now = int(time.time())
|
||||
return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
|
||||
|
||||
def clear(self, *args, **kwargs):
|
||||
with contextlib.suppress(KeyError):
|
||||
return super().clear(*args, **kwargs)
|
||||
|
|
|
@ -43,6 +43,8 @@ except Exception as _err:
|
|||
|
||||
try:
|
||||
import sqlite3
|
||||
# We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152
|
||||
sqlite3._yt_dlp__version = sqlite3.sqlite_version
|
||||
except ImportError:
|
||||
# although sqlite3 is part of the standard library, it is possible to compile python without
|
||||
# sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544
|
||||
|
|
|
@ -255,7 +255,8 @@ class FileDownloader:
|
|||
|
||||
@wrap_file_access('remove')
|
||||
def try_remove(self, filename):
|
||||
os.remove(filename)
|
||||
if os.path.isfile(filename):
|
||||
os.remove(filename)
|
||||
|
||||
@wrap_file_access('rename')
|
||||
def try_rename(self, old_filename, new_filename):
|
||||
|
@ -418,7 +419,6 @@ class FileDownloader:
|
|||
"""Download to a filename using the info from info_dict
|
||||
Return True on success and False otherwise
|
||||
"""
|
||||
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
import enum
|
||||
import json
|
||||
import os.path
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import functools
|
||||
from ..networking import Request
|
||||
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
|
||||
from ..utils import (
|
||||
Popen,
|
||||
|
@ -24,7 +26,6 @@ from ..utils import (
|
|||
encodeFilename,
|
||||
find_available_port,
|
||||
remove_end,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
@ -42,6 +43,7 @@ class ExternalFD(FragmentFD):
|
|||
def real_download(self, filename, info_dict):
|
||||
self.report_destination(filename)
|
||||
tmpfilename = self.temp_name(filename)
|
||||
self._cookies_tempfile = None
|
||||
|
||||
try:
|
||||
started = time.time()
|
||||
|
@ -54,6 +56,9 @@ class ExternalFD(FragmentFD):
|
|||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
finally:
|
||||
if self._cookies_tempfile:
|
||||
self.try_remove(self._cookies_tempfile)
|
||||
|
||||
if retval == 0:
|
||||
status = {
|
||||
|
@ -125,6 +130,16 @@ class ExternalFD(FragmentFD):
|
|||
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
|
||||
keys, *args, **kwargs)
|
||||
|
||||
def _write_cookies(self):
|
||||
if not self.ydl.cookiejar.filename:
|
||||
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
|
||||
tmp_cookies.close()
|
||||
self._cookies_tempfile = tmp_cookies.name
|
||||
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
|
||||
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
|
||||
self.ydl.cookiejar.save(self._cookies_tempfile)
|
||||
return self.ydl.cookiejar.filename or self._cookies_tempfile
|
||||
|
||||
def _call_downloader(self, tmpfilename, info_dict):
|
||||
""" Either overwrite this or implement _make_cmd """
|
||||
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
|
||||
|
@ -184,6 +199,9 @@ class CurlFD(ExternalFD):
|
|||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += ['--cookie', cookie_header]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
|
@ -214,6 +232,9 @@ class AxelFD(ExternalFD):
|
|||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['-H', f'{key}: {val}']
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
|
||||
cmd += self._configuration_args()
|
||||
cmd += ['--', info_dict['url']]
|
||||
return cmd
|
||||
|
@ -223,7 +244,9 @@ class WgetFD(ExternalFD):
|
|||
AVAILABLE_OPT = '--version'
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
|
||||
cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
|
||||
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||
cmd += ['--load-cookies', self._write_cookies()]
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
|
@ -271,7 +294,7 @@ class Aria2cFD(ExternalFD):
|
|||
return super()._call_downloader(tmpfilename, info_dict)
|
||||
|
||||
def _make_cmd(self, tmpfilename, info_dict):
|
||||
cmd = [self.exe, '-c',
|
||||
cmd = [self.exe, '-c', '--no-conf',
|
||||
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
|
||||
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
|
||||
if 'fragments' in info_dict:
|
||||
|
@ -279,6 +302,8 @@ class Aria2cFD(ExternalFD):
|
|||
else:
|
||||
cmd += ['--min-split-size', '1M']
|
||||
|
||||
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
|
||||
cmd += [f'--load-cookies={self._write_cookies()}']
|
||||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += ['--header', f'{key}: {val}']
|
||||
|
@ -333,13 +358,12 @@ class Aria2cFD(ExternalFD):
|
|||
'method': method,
|
||||
'params': [f'token:{rpc_secret}', *params],
|
||||
}).encode('utf-8')
|
||||
request = sanitized_Request(
|
||||
request = Request(
|
||||
f'http://localhost:{rpc_port}/jsonrpc',
|
||||
data=d, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': f'{len(d)}',
|
||||
'Ytdl-request-proxy': '__noproxy__',
|
||||
})
|
||||
}, proxies={'all': None})
|
||||
with self.ydl.urlopen(request) as r:
|
||||
resp = json.load(r)
|
||||
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
|
||||
|
@ -417,6 +441,14 @@ class HttpieFD(ExternalFD):
|
|||
if info_dict.get('http_headers') is not None:
|
||||
for key, val in info_dict['http_headers'].items():
|
||||
cmd += [f'{key}:{val}']
|
||||
|
||||
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
|
||||
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
|
||||
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
|
||||
# 2: https://httpie.io/docs/cli/sessions
|
||||
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
|
||||
if cookie_header:
|
||||
cmd += [f'Cookie:{cookie_header}']
|
||||
return cmd
|
||||
|
||||
|
||||
|
@ -527,7 +559,13 @@ class FFmpegFD(ExternalFD):
|
|||
|
||||
selected_formats = info_dict.get('requested_formats') or [info_dict]
|
||||
for i, fmt in enumerate(selected_formats):
|
||||
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
|
||||
is_http = re.match(r'^https?://', fmt['url'])
|
||||
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
|
||||
if cookies:
|
||||
args.extend(['-cookies', ''.join(
|
||||
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
|
||||
for cookie in cookies)])
|
||||
if fmt.get('http_headers') and is_http:
|
||||
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
|
||||
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
|
||||
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
|
||||
|
|
|
@ -3,11 +3,11 @@ import io
|
|||
import itertools
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import fix_xml_ampersands, xpath_text
|
||||
|
||||
|
||||
|
@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
|
|||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
man_url = urlh.url
|
||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
|
||||
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
|
||||
|
@ -407,8 +407,8 @@ class F4mFD(FragmentFD):
|
|||
if box_type == b'mdat':
|
||||
self._append_fragment(ctx, box_data)
|
||||
break
|
||||
except urllib.error.HTTPError as err:
|
||||
if live and (err.code == 404 or err.code == 410):
|
||||
except HTTPError as err:
|
||||
if live and (err.status == 404 or err.status == 410):
|
||||
# We didn't keep up with the live window. Continue
|
||||
# with the next available fragment.
|
||||
msg = 'Fragment %d unavailable' % frag_i
|
||||
|
|
|
@ -1,24 +1,20 @@
|
|||
import concurrent.futures
|
||||
import contextlib
|
||||
import http.client
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_os_name
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
RetryManager,
|
||||
encodeFilename,
|
||||
sanitized_Request,
|
||||
traverse_obj,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError, IncompleteRead
|
||||
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
from ..utils.progress import ProgressCalculator
|
||||
|
||||
|
||||
class HttpQuietDownloader(HttpFD):
|
||||
|
@ -75,7 +71,7 @@ class FragmentFD(FileDownloader):
|
|||
|
||||
def _prepare_url(self, info_dict, url):
|
||||
headers = info_dict.get('http_headers')
|
||||
return sanitized_Request(url, None, headers) if headers else url
|
||||
return Request(url, None, headers) if headers else url
|
||||
|
||||
def _prepare_and_start_frag_download(self, ctx, info_dict):
|
||||
self._prepare_frag_download(ctx)
|
||||
|
@ -231,8 +227,7 @@ class FragmentFD(FileDownloader):
|
|||
resume_len = ctx['complete_frags_downloaded_bytes']
|
||||
total_frags = ctx['total_frags']
|
||||
ctx_id = ctx.get('ctx_id')
|
||||
# This dict stores the download progress, it's updated by the progress
|
||||
# hook
|
||||
# Stores the download progress, updated by the progress hook
|
||||
state = {
|
||||
'status': 'downloading',
|
||||
'downloaded_bytes': resume_len,
|
||||
|
@ -242,14 +237,8 @@ class FragmentFD(FileDownloader):
|
|||
'tmpfilename': ctx['tmpfilename'],
|
||||
}
|
||||
|
||||
start = time.time()
|
||||
ctx.update({
|
||||
'started': start,
|
||||
'fragment_started': start,
|
||||
# Amount of fragment's bytes downloaded by the time of the previous
|
||||
# frag progress hook invocation
|
||||
'prev_frag_downloaded_bytes': 0,
|
||||
})
|
||||
ctx['started'] = time.time()
|
||||
progress = ProgressCalculator(resume_len)
|
||||
|
||||
def frag_progress_hook(s):
|
||||
if s['status'] not in ('downloading', 'finished'):
|
||||
|
@ -264,45 +253,40 @@ class FragmentFD(FileDownloader):
|
|||
state['max_progress'] = ctx.get('max_progress')
|
||||
state['progress_idx'] = ctx.get('progress_idx')
|
||||
|
||||
time_now = time.time()
|
||||
state['elapsed'] = time_now - start
|
||||
state['elapsed'] = progress.elapsed
|
||||
frag_total_bytes = s.get('total_bytes') or 0
|
||||
s['fragment_info_dict'] = s.pop('info_dict', {})
|
||||
|
||||
# XXX: Fragment resume is not accounted for here
|
||||
if not ctx['live']:
|
||||
estimated_size = (
|
||||
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
|
||||
/ (state['fragment_index'] + 1) * total_frags)
|
||||
state['total_bytes_estimate'] = estimated_size
|
||||
progress.total = estimated_size
|
||||
progress.update(s.get('downloaded_bytes'))
|
||||
state['total_bytes_estimate'] = progress.total
|
||||
else:
|
||||
progress.update(s.get('downloaded_bytes'))
|
||||
|
||||
if s['status'] == 'finished':
|
||||
state['fragment_index'] += 1
|
||||
ctx['fragment_index'] = state['fragment_index']
|
||||
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_total_bytes)
|
||||
ctx['fragment_started'] = time.time()
|
||||
ctx['prev_frag_downloaded_bytes'] = 0
|
||||
else:
|
||||
frag_downloaded_bytes = s['downloaded_bytes']
|
||||
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
|
||||
ctx['speed'] = state['speed'] = self.calc_speed(
|
||||
ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx.get('frag_resume_len', 0))
|
||||
if not ctx['live']:
|
||||
state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
|
||||
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
|
||||
progress.thread_reset()
|
||||
|
||||
state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded
|
||||
state['speed'] = ctx['speed'] = progress.speed.smooth
|
||||
state['eta'] = progress.eta.smooth
|
||||
|
||||
self._hook_progress(state, info_dict)
|
||||
|
||||
ctx['dl'].add_progress_hook(frag_progress_hook)
|
||||
|
||||
return start
|
||||
return ctx['started']
|
||||
|
||||
def _finish_frag_download(self, ctx, info_dict):
|
||||
ctx['dest_stream'].close()
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
|
||||
if os.path.isfile(ytdl_filename):
|
||||
self.try_remove(ytdl_filename)
|
||||
self.try_remove(self.ytdl_filename(ctx['filename']))
|
||||
elapsed = time.time() - ctx['started']
|
||||
|
||||
to_file = ctx['tmpfilename'] != '-'
|
||||
|
@ -459,7 +443,7 @@ class FragmentFD(FileDownloader):
|
|||
|
||||
frag_index = ctx['fragment_index'] = fragment['frag_index']
|
||||
ctx['last_error'] = None
|
||||
headers = info_dict.get('http_headers', {}).copy()
|
||||
headers = HTTPHeaderDict(info_dict.get('http_headers'))
|
||||
byte_range = fragment.get('byte_range')
|
||||
if byte_range:
|
||||
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
|
||||
|
@ -479,7 +463,7 @@ class FragmentFD(FileDownloader):
|
|||
if not self._download_fragment(
|
||||
ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
|
||||
return
|
||||
except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
|
||||
except (HTTPError, IncompleteRead) as err:
|
||||
retry.error = err
|
||||
continue
|
||||
except DownloadError: # has own retry settings
|
||||
|
@ -507,7 +491,6 @@ class FragmentFD(FileDownloader):
|
|||
download_fragment(fragment, ctx_copy)
|
||||
return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
|
||||
|
||||
self.report_warning('The download speed shown is only of one thread. This is a known issue')
|
||||
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
|
||||
try:
|
||||
for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
|
||||
|
|
|
@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
|
|||
FD_NAME = 'hlsnative'
|
||||
|
||||
@staticmethod
|
||||
def can_download(manifest, info_dict, allow_unplayable_formats=False):
|
||||
def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
|
||||
return bool(re.search('|'.join((
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
)), manifest))
|
||||
|
||||
@classmethod
|
||||
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
|
||||
UNSUPPORTED_FEATURES = [
|
||||
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
|
||||
|
||||
|
@ -50,13 +59,15 @@ class HlsFD(FragmentFD):
|
|||
]
|
||||
if not allow_unplayable_formats:
|
||||
UNSUPPORTED_FEATURES += [
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
|
||||
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
|
||||
]
|
||||
|
||||
def check_results():
|
||||
yield not info_dict.get('is_live')
|
||||
for feature in UNSUPPORTED_FEATURES:
|
||||
yield not re.search(feature, manifest)
|
||||
if not allow_unplayable_formats:
|
||||
yield not cls._has_drm(manifest)
|
||||
return all(check_results())
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
|
@ -64,7 +75,7 @@ class HlsFD(FragmentFD):
|
|||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.geturl()
|
||||
man_url = urlh.url
|
||||
s = urlh.read().decode('utf-8', 'ignore')
|
||||
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
|
@ -81,14 +92,13 @@ class HlsFD(FragmentFD):
|
|||
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
||||
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
|
||||
if not can_download:
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), s)
|
||||
if has_drm and not self.params.get('allow_unplayable_formats'):
|
||||
self.report_error(
|
||||
'This video is DRM protected; Try selecting another format with --format or '
|
||||
'add --check-formats to automatically fallback to the next best format')
|
||||
if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
|
||||
if info_dict.get('has_drm') and self.params.get('test'):
|
||||
self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
|
||||
else:
|
||||
self.report_error(
|
||||
'This format is DRM protected; Try selecting another format with --format or '
|
||||
'add --check-formats to automatically fallback to the next best format', tb=False)
|
||||
return False
|
||||
message = message or 'Unsupported features have been detected'
|
||||
fd = FFmpegFD(self.ydl, self.params)
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
import http.client
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import (
|
||||
CertificateVerifyError,
|
||||
HTTPError,
|
||||
TransportError,
|
||||
)
|
||||
from ..utils import (
|
||||
ContentTooShortError,
|
||||
RetryManager,
|
||||
|
@ -16,18 +18,10 @@ from ..utils import (
|
|||
encodeFilename,
|
||||
int_or_none,
|
||||
parse_http_range,
|
||||
sanitized_Request,
|
||||
try_call,
|
||||
write_xattr,
|
||||
)
|
||||
|
||||
RESPONSE_READ_EXCEPTIONS = (
|
||||
TimeoutError,
|
||||
socket.timeout, # compat: py < 3.10
|
||||
ConnectionError,
|
||||
ssl.SSLError,
|
||||
http.client.HTTPException
|
||||
)
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class HttpFD(FileDownloader):
|
||||
|
@ -46,10 +40,7 @@ class HttpFD(FileDownloader):
|
|||
ctx.stream = None
|
||||
|
||||
# Disable compression
|
||||
headers = {'Accept-Encoding': 'identity'}
|
||||
add_headers = info_dict.get('http_headers')
|
||||
if add_headers:
|
||||
headers.update(add_headers)
|
||||
headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
|
||||
|
||||
is_test = self.params.get('test', False)
|
||||
chunk_size = self._TEST_FILE_SIZE if is_test else (
|
||||
|
@ -120,10 +111,10 @@ class HttpFD(FileDownloader):
|
|||
if try_call(lambda: range_end >= ctx.content_len):
|
||||
range_end = ctx.content_len - 1
|
||||
|
||||
request = sanitized_Request(url, request_data, headers)
|
||||
request = Request(url, request_data, headers)
|
||||
has_range = range_start is not None
|
||||
if has_range:
|
||||
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
|
||||
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
|
||||
# Establish connection
|
||||
try:
|
||||
ctx.data = self.ydl.urlopen(request)
|
||||
|
@ -154,17 +145,17 @@ class HttpFD(FileDownloader):
|
|||
self.report_unable_to_resume()
|
||||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code == 416:
|
||||
ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
|
||||
except HTTPError as err:
|
||||
if err.status == 416:
|
||||
# Unable to resume (requested range not satisfiable)
|
||||
try:
|
||||
# Open the connection again without the range header
|
||||
ctx.data = self.ydl.urlopen(
|
||||
sanitized_Request(url, request_data, headers))
|
||||
content_length = ctx.data.info()['Content-Length']
|
||||
except urllib.error.HTTPError as err:
|
||||
if err.code < 500 or err.code >= 600:
|
||||
Request(url, request_data, headers))
|
||||
content_length = ctx.data.headers['Content-Length']
|
||||
except HTTPError as err:
|
||||
if err.status < 500 or err.status >= 600:
|
||||
raise
|
||||
else:
|
||||
# Examine the reported length
|
||||
|
@ -192,17 +183,13 @@ class HttpFD(FileDownloader):
|
|||
ctx.resume_len = 0
|
||||
ctx.open_mode = 'wb'
|
||||
return
|
||||
elif err.code < 500 or err.code >= 600:
|
||||
elif err.status < 500 or err.status >= 600:
|
||||
# Unexpected HTTP error
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
except urllib.error.URLError as err:
|
||||
if isinstance(err.reason, ssl.CertificateError):
|
||||
raise
|
||||
raise RetryDownload(err)
|
||||
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
|
||||
# Any errors that occur during this will not be wrapped by URLError
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
except CertificateVerifyError:
|
||||
raise
|
||||
except TransportError as err:
|
||||
raise RetryDownload(err)
|
||||
|
||||
def close_stream():
|
||||
|
@ -212,9 +199,9 @@ class HttpFD(FileDownloader):
|
|||
ctx.stream = None
|
||||
|
||||
def download():
|
||||
data_len = ctx.data.info().get('Content-length')
|
||||
data_len = ctx.data.headers.get('Content-length')
|
||||
|
||||
if ctx.data.info().get('Content-encoding'):
|
||||
if ctx.data.headers.get('Content-encoding'):
|
||||
# Content-encoding is present, Content-length is not reliable anymore as we are
|
||||
# doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
|
||||
data_len = None
|
||||
|
@ -258,7 +245,7 @@ class HttpFD(FileDownloader):
|
|||
try:
|
||||
# Download and write
|
||||
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
|
||||
except RESPONSE_READ_EXCEPTIONS as err:
|
||||
except TransportError as err:
|
||||
retry(err)
|
||||
|
||||
byte_counter += len(data_block)
|
||||
|
@ -339,15 +326,15 @@ class HttpFD(FileDownloader):
|
|||
elif speed:
|
||||
ctx.throttle_start = None
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
# ctx.block_size = block_size
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.stream is None:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('Did not get any data blocks')
|
||||
return False
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
raise NextFragment()
|
||||
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
||||
|
@ -359,7 +346,7 @@ class HttpFD(FileDownloader):
|
|||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': byte_counter,
|
||||
|
|
|
@ -2,9 +2,9 @@ import binascii
|
|||
import io
|
||||
import struct
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import RetryManager
|
||||
|
||||
u8 = struct.Struct('>B')
|
||||
|
@ -271,7 +271,7 @@ class IsmFD(FragmentFD):
|
|||
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
|
||||
extra_state['ism_track_written'] = True
|
||||
self._append_fragment(ctx, frag_content)
|
||||
except urllib.error.HTTPError as err:
|
||||
except HTTPError as err:
|
||||
retry.error = err
|
||||
continue
|
||||
|
||||
|
|
|
@ -5,13 +5,8 @@ import time
|
|||
from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..utils import (
|
||||
DownloadError,
|
||||
WebSocketsWrapper,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
|
@ -33,7 +28,7 @@ class NiconicoDmcFD(FileDownloader):
|
|||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = sanitized_Request(heartbeat_url, heartbeat_data)
|
||||
request = Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import json
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .fragment import FragmentFD
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
RegexNotFoundError,
|
||||
RetryManager,
|
||||
|
@ -10,6 +10,7 @@ from ..utils import (
|
|||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class YoutubeLiveChatFD(FragmentFD):
|
||||
|
@ -37,10 +38,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
|||
start_time = int(time.time() * 1000)
|
||||
|
||||
def dl_fragment(url, data=None, headers=None):
|
||||
http_headers = info_dict.get('http_headers', {})
|
||||
if headers:
|
||||
http_headers = http_headers.copy()
|
||||
http_headers.update(headers)
|
||||
http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
|
||||
return self._download_fragment(ctx, url, info_dict, http_headers, data)
|
||||
|
||||
def parse_actions_replay(live_chat_continuation):
|
||||
|
@ -129,7 +127,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
|||
or frag_index == 1 and try_refresh_replay_beginning
|
||||
or parse_actions_replay)
|
||||
return (True, *func(live_chat_continuation))
|
||||
except urllib.error.HTTPError as err:
|
||||
except HTTPError as err:
|
||||
retry.error = err
|
||||
continue
|
||||
return False, None, None, None
|
||||
|
|
|
@ -15,7 +15,6 @@ from .youtube import ( # Youtube is moved to the top to improve performance
|
|||
YoutubeSearchURLIE,
|
||||
YoutubeMusicSearchURLIE,
|
||||
YoutubeSubscriptionsIE,
|
||||
YoutubeStoriesIE,
|
||||
YoutubeTruncatedIDIE,
|
||||
YoutubeTruncatedURLIE,
|
||||
YoutubeYtBeIE,
|
||||
|
@ -123,7 +122,6 @@ from .applepodcasts import ApplePodcastsIE
|
|||
from .archiveorg import (
|
||||
ArchiveOrgIE,
|
||||
YoutubeWebArchiveIE,
|
||||
VLiveWebArchiveIE,
|
||||
)
|
||||
from .arcpublishing import ArcPublishingIE
|
||||
from .arkena import ArkenaIE
|
||||
|
@ -139,10 +137,6 @@ from .arte import (
|
|||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asiancrush import (
|
||||
AsianCrushIE,
|
||||
AsianCrushPlaylistIE,
|
||||
)
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atttechchannel import ATTTechChannelIE
|
||||
|
@ -166,6 +160,7 @@ from .awaan import (
|
|||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .axs import AxsIE
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .banbye import (
|
||||
|
@ -215,6 +210,7 @@ from .bild import BildIE
|
|||
from .bilibili import (
|
||||
BiliBiliIE,
|
||||
BiliBiliBangumiIE,
|
||||
BiliBiliBangumiSeasonIE,
|
||||
BiliBiliBangumiMediaIE,
|
||||
BiliBiliSearchIE,
|
||||
BilibiliCategoryIE,
|
||||
|
@ -223,7 +219,11 @@ from .bilibili import (
|
|||
BiliBiliPlayerIE,
|
||||
BilibiliSpaceVideoIE,
|
||||
BilibiliSpaceAudioIE,
|
||||
BilibiliSpacePlaylistIE,
|
||||
BilibiliCollectionListIE,
|
||||
BilibiliSeriesListIE,
|
||||
BilibiliFavoritesListIE,
|
||||
BilibiliWatchlaterIE,
|
||||
BilibiliPlaylistIE,
|
||||
BiliIntlIE,
|
||||
BiliIntlSeriesIE,
|
||||
BiliLiveIE,
|
||||
|
@ -271,6 +271,10 @@ from .brightcove import (
|
|||
BrightcoveLegacyIE,
|
||||
BrightcoveNewIE,
|
||||
)
|
||||
from .brilliantpala import (
|
||||
BrilliantpalaElearnIE,
|
||||
BrilliantpalaClassesIE,
|
||||
)
|
||||
from .businessinsider import BusinessInsiderIE
|
||||
from .bundesliga import BundesligaIE
|
||||
from .buzzfeed import BuzzFeedIE
|
||||
|
@ -292,9 +296,11 @@ from .cammodels import CamModelsIE
|
|||
from .camsoda import CamsodaIE
|
||||
from .camtasia import CamtasiaEmbedIE
|
||||
from .camwithher import CamWithHerIE
|
||||
from .canal1 import Canal1IE
|
||||
from .canalalpha import CanalAlphaIE
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalc2 import Canalc2IE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .carambatv import (
|
||||
CarambaTVIE,
|
||||
CarambaTVPageIE,
|
||||
|
@ -303,6 +309,7 @@ from .cartoonnetwork import CartoonNetworkIE
|
|||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCPlayerIE,
|
||||
CBCPlayerPlaylistIE,
|
||||
CBCGemIE,
|
||||
CBCGemPlaylistIE,
|
||||
CBCGemLiveIE,
|
||||
|
@ -351,6 +358,10 @@ from .chirbit import (
|
|||
from .cinchcast import CinchcastIE
|
||||
from .cinemax import CinemaxIE
|
||||
from .cinetecamilano import CinetecaMilanoIE
|
||||
from .cineverse import (
|
||||
CineverseIE,
|
||||
CineverseDetailsIE,
|
||||
)
|
||||
from .ciscolive import (
|
||||
CiscoLiveSessionIE,
|
||||
CiscoLiveSearchIE,
|
||||
|
@ -560,8 +571,10 @@ from .epicon import (
|
|||
EpiconIE,
|
||||
EpiconSeriesIE,
|
||||
)
|
||||
from .eplus import EplusIbIE
|
||||
from .epoch import EpochIE
|
||||
from .eporner import EpornerIE
|
||||
from .erocast import ErocastIE
|
||||
from .eroprofile import (
|
||||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
|
@ -939,6 +952,7 @@ from .lastfm import (
|
|||
from .lbry import (
|
||||
LBRYIE,
|
||||
LBRYChannelIE,
|
||||
LBRYPlaylistIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
from .lcp import (
|
||||
|
@ -1012,6 +1026,7 @@ from .lynda import (
|
|||
LyndaCourseIE
|
||||
)
|
||||
from .m6 import M6IE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
|
@ -1117,6 +1132,7 @@ from .mofosex import (
|
|||
MofosexEmbedIE,
|
||||
)
|
||||
from .mojvideo import MojvideoIE
|
||||
from .monstercat import MonstercatIE
|
||||
from .morningstar import MorningstarIE
|
||||
from .motherless import (
|
||||
MotherlessIE,
|
||||
|
@ -1141,6 +1157,7 @@ from .mtv import (
|
|||
)
|
||||
from .muenchentv import MuenchenTVIE
|
||||
from .murrtube import MurrtubeIE, MurrtubeUserIE
|
||||
from .museai import MuseAIIE
|
||||
from .musescore import MuseScoreIE
|
||||
from .musicdex import (
|
||||
MusicdexSongIE,
|
||||
|
@ -1288,6 +1305,11 @@ from .ninecninemedia import (
|
|||
NineCNineMediaIE,
|
||||
CPTwentyFourIE,
|
||||
)
|
||||
from .niconicochannelplus import (
|
||||
NiconicoChannelPlusIE,
|
||||
NiconicoChannelPlusChannelVideosIE,
|
||||
NiconicoChannelPlusChannelLivesIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
|
@ -1418,7 +1440,7 @@ from .patreon import (
|
|||
PatreonIE,
|
||||
PatreonCampaignIE
|
||||
)
|
||||
from .pbs import PBSIE
|
||||
from .pbs import PBSIE, PBSKidsIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peekvids import PeekVidsIE, PlayVidsIE
|
||||
from .peertube import (
|
||||
|
@ -1441,6 +1463,7 @@ from .philharmoniedeparis import PhilharmonieDeParisIE
|
|||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .piapro import PiaproIE
|
||||
from .piaulizaportal import PIAULIZAPortalIE
|
||||
from .picarto import (
|
||||
PicartoIE,
|
||||
PicartoVodIE,
|
||||
|
@ -1498,6 +1521,7 @@ from .polskieradio import (
|
|||
from .popcorntimes import PopcorntimesIE
|
||||
from .popcorntv import PopcornTVIE
|
||||
from .porn91 import Porn91IE
|
||||
from .pornbox import PornboxIE
|
||||
from .porncom import PornComIE
|
||||
from .pornflip import PornFlipIE
|
||||
from .pornhd import PornHdIE
|
||||
|
@ -1516,7 +1540,7 @@ from .puhutv import (
|
|||
PuhuTVIE,
|
||||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammStaticIE, Pr0grammIE
|
||||
from .pr0gramm import Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
|
@ -1531,6 +1555,7 @@ from .prx import (
|
|||
)
|
||||
from .puls4 import Puls4IE
|
||||
from .pyvideo import PyvideoIE
|
||||
from .qdance import QDanceIE
|
||||
from .qingting import QingTingIE
|
||||
from .qqmusic import (
|
||||
QQMusicIE,
|
||||
|
@ -1551,7 +1576,14 @@ from .radiocanada import (
|
|||
from .radiode import RadioDeIE
|
||||
from .radiojavan import RadioJavanIE
|
||||
from .radiobremen import RadioBremenIE
|
||||
from .radiofrance import FranceCultureIE, RadioFranceIE
|
||||
from .radiofrance import (
|
||||
FranceCultureIE,
|
||||
RadioFranceIE,
|
||||
RadioFranceLiveIE,
|
||||
RadioFrancePodcastIE,
|
||||
RadioFranceProfileIE,
|
||||
RadioFranceProgramScheduleIE,
|
||||
)
|
||||
from .radiozet import RadioZetPodcastIE
|
||||
from .radiokapital import (
|
||||
RadioKapitalIE,
|
||||
|
@ -1582,6 +1614,7 @@ from .rbmaradio import RBMARadioIE
|
|||
from .rbgtum import (
|
||||
RbgTumIE,
|
||||
RbgTumCourseIE,
|
||||
RbgTumNewCourseIE,
|
||||
)
|
||||
from .rcs import (
|
||||
RCSIE,
|
||||
|
@ -1695,8 +1728,8 @@ from .megatvcom import (
|
|||
MegaTVComIE,
|
||||
MegaTVComEmbedIE,
|
||||
)
|
||||
from .ant1newsgr import (
|
||||
Ant1NewsGrWatchIE,
|
||||
from .antenna import (
|
||||
AntennaGrWatchIE,
|
||||
Ant1NewsGrArticleIE,
|
||||
Ant1NewsGrEmbedIE,
|
||||
)
|
||||
|
@ -1706,6 +1739,10 @@ from .ruv import (
|
|||
RuvIE,
|
||||
RuvSpilaIE
|
||||
)
|
||||
from .s4c import (
|
||||
S4CIE,
|
||||
S4CSeriesIE
|
||||
)
|
||||
from .safari import (
|
||||
SafariIE,
|
||||
SafariApiIE,
|
||||
|
@ -1786,7 +1823,10 @@ from .slideslive import SlidesLiveIE
|
|||
from .slutload import SlutloadIE
|
||||
from .smotrim import SmotrimIE
|
||||
from .snotr import SnotrIE
|
||||
from .sohu import SohuIE
|
||||
from .sohu import (
|
||||
SohuIE,
|
||||
SohuVIE,
|
||||
)
|
||||
from .sonyliv import (
|
||||
SonyLIVIE,
|
||||
SonyLIVSeriesIE,
|
||||
|
@ -1854,6 +1894,10 @@ from .srgssr import (
|
|||
SRGSSRPlayIE,
|
||||
)
|
||||
from .srmediathek import SRMediathekIE
|
||||
from .stacommu import (
|
||||
StacommuLiveIE,
|
||||
StacommuVODIE,
|
||||
)
|
||||
from .stanfordoc import StanfordOpenClassroomIE
|
||||
from .startv import StarTVIE
|
||||
from .steam import (
|
||||
|
@ -1866,7 +1910,6 @@ from .storyfire import (
|
|||
StoryFireSeriesIE,
|
||||
)
|
||||
from .streamable import StreamableIE
|
||||
from .streamanity import StreamanityIE
|
||||
from .streamcloud import StreamcloudIE
|
||||
from .streamcz import StreamCZIE
|
||||
from .streamff import StreamFFIE
|
||||
|
@ -1894,6 +1937,11 @@ from .sztvhu import SztvHuIE
|
|||
from .tagesschau import TagesschauIE
|
||||
from .tass import TassIE
|
||||
from .tbs import TBSIE
|
||||
from .tbsjp import (
|
||||
TBSJPEpisodeIE,
|
||||
TBSJPProgramIE,
|
||||
TBSJPPlaylistIE,
|
||||
)
|
||||
from .tdslifeway import TDSLifewayIE
|
||||
from .teachable import (
|
||||
TeachableIE,
|
||||
|
@ -1956,10 +2004,6 @@ from .theplatform import (
|
|||
)
|
||||
from .thestar import TheStarIE
|
||||
from .thesun import TheSunIE
|
||||
from .theta import (
|
||||
ThetaVideoIE,
|
||||
ThetaStreamIE,
|
||||
)
|
||||
from .theweatherchannel import TheWeatherChannelIE
|
||||
from .thisamericanlife import ThisAmericanLifeIE
|
||||
from .thisav import ThisAVIE
|
||||
|
@ -2267,6 +2311,8 @@ from .vk import (
|
|||
VKIE,
|
||||
VKUserVideosIE,
|
||||
VKWallPostIE,
|
||||
VKPlayIE,
|
||||
VKPlayLiveIE,
|
||||
)
|
||||
from .vocaroo import VocarooIE
|
||||
from .vodlocker import VodlockerIE
|
||||
|
@ -2339,7 +2385,8 @@ from .webofstories import (
|
|||
)
|
||||
from .weibo import (
|
||||
WeiboIE,
|
||||
WeiboMobileIE
|
||||
WeiboVideoIE,
|
||||
WeiboUserIE,
|
||||
)
|
||||
from .weiqitv import WeiqiTVIE
|
||||
from .weverse import (
|
||||
|
@ -2355,6 +2402,7 @@ from .weyyak import WeyyakIE
|
|||
from .whyp import WhypIE
|
||||
from .wikimedia import WikimediaIE
|
||||
from .willow import WillowIE
|
||||
from .wimbledon import WimbledonIE
|
||||
from .wimtv import WimTVIE
|
||||
from .whowatch import WhoWatchIE
|
||||
from .wistia import (
|
||||
|
|
|
@ -12,6 +12,7 @@ from ..utils import (
|
|||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
|
@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
|
|||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
'id': '102520540',
|
||||
'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -107,7 +117,7 @@ class ABCIE(InfoExtractor):
|
|||
video = True
|
||||
|
||||
if mobj is None:
|
||||
mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage)
|
||||
mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
|
||||
if mobj is None:
|
||||
mobj = re.search(
|
||||
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
|
||||
|
@ -121,7 +131,8 @@ class ABCIE(InfoExtractor):
|
|||
urls_info = self._parse_json(
|
||||
mobj.group('json_data'), video_id, transform_source=js_to_json)
|
||||
youtube = mobj.group('type') == 'YouTube'
|
||||
video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4'
|
||||
video = mobj.group('type') == 'Video' or traverse_obj(
|
||||
urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
|
||||
|
||||
if not isinstance(urls_info, list):
|
||||
urls_info = [urls_info]
|
||||
|
@ -169,20 +180,103 @@ class ABCIViewIE(InfoExtractor):
|
|||
_VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
|
||||
_GEO_COUNTRIES = ['AU']
|
||||
|
||||
# ABC iview programs are normally available for 14 days only.
|
||||
_TESTS = [{
|
||||
'url': 'https://iview.abc.net.au/show/utopia/series/1/video/CO1211V001S00',
|
||||
'md5': '52a942bfd7a0b79a6bfe9b4ce6c9d0ed',
|
||||
'info_dict': {
|
||||
'id': 'CO1211V001S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 1 Ep 1 Wood For The Trees',
|
||||
'series': 'Utopia',
|
||||
'description': 'md5:0cfb2c183c1b952d1548fd65c8a95c00',
|
||||
'upload_date': '20230726',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'CO1211V',
|
||||
'episode_id': 'CO1211V001S00',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Wood For The Trees',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/co/CO1211V001S00_5ad8353f4df09_1280.jpg',
|
||||
'timestamp': 1690403700,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name',
|
||||
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||
'info_dict': {
|
||||
'id': 'LE1927H001S00',
|
||||
'ext': 'mp4',
|
||||
'title': "Series 11 Ep 1",
|
||||
'series': "Gruen",
|
||||
'title': 'Series 11 Ep 1',
|
||||
'series': 'Gruen',
|
||||
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||
'upload_date': '20190925',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'LE1927H',
|
||||
'episode_id': 'LE1927H001S00',
|
||||
'season_number': 11,
|
||||
'season': 'Season 11',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/le/LE1927H001S00_5d954fbd79e25_1280.jpg',
|
||||
'timestamp': 1569445289,
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode number',
|
||||
'url': 'https://iview.abc.net.au/show/four-corners/series/2022/video/NC2203H039S00',
|
||||
'md5': '77cb7d8434440e3b28fbebe331c2456a',
|
||||
'info_dict': {
|
||||
'id': 'NC2203H039S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2022 Locking Up Kids',
|
||||
'series': 'Four Corners',
|
||||
'description': 'md5:54829ca108846d1a70e1fcce2853e720',
|
||||
'upload_date': '20221114',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'NC2203H',
|
||||
'episode_id': 'NC2203H039S00',
|
||||
'season_number': 2022,
|
||||
'season': 'Season 2022',
|
||||
'episode_number': None,
|
||||
'episode': 'Locking Up Kids',
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
|
||||
'timestamp': 1668460497,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'note': 'No episode name or number',
|
||||
'url': 'https://iview.abc.net.au/show/landline/series/2021/video/RF2004Q043S00',
|
||||
'md5': '2e17dec06b13cc81dc119d2565289396',
|
||||
'info_dict': {
|
||||
'id': 'RF2004Q043S00',
|
||||
'ext': 'mp4',
|
||||
'title': 'Series 2021',
|
||||
'series': 'Landline',
|
||||
'description': 'md5:c9f30d9c0c914a7fd23842f6240be014',
|
||||
'upload_date': '20211205',
|
||||
'uploader_id': 'abc1',
|
||||
'series_id': 'RF2004Q',
|
||||
'episode_id': 'RF2004Q043S00',
|
||||
'season_number': 2021,
|
||||
'season': 'Season 2021',
|
||||
'episode_number': None,
|
||||
'episode': None,
|
||||
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
|
||||
'timestamp': 1638710705,
|
||||
|
||||
},
|
||||
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
|
@ -244,6 +338,8 @@ class ABCIViewIE(InfoExtractor):
|
|||
'episode_number': int_or_none(self._search_regex(
|
||||
r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
|
||||
'episode_id': house_number,
|
||||
'episode': self._search_regex(
|
||||
r'^(?:Series\s+\d+)?\s*(?:Ep\s+\d+)?\s*(.*)$', title, 'episode', default='') or None,
|
||||
'uploader_id': video_params.get('channel'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
|
|
@ -12,7 +12,7 @@ import urllib.parse
|
|||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
|
||||
from ..utils.networking import clean_proxies
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
|
@ -22,80 +22,26 @@ from ..utils import (
|
|||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
request_to_url,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
|
||||
|
||||
|
||||
def add_opener(ydl, handler):
|
||||
''' Add a handler for opening URLs, like _download_webpage '''
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
"""Add a handler for opening URLs, like _download_webpage"""
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
ydl._opener.add_handler(handler)
|
||||
|
||||
|
||||
def remove_opener(ydl, handler):
|
||||
'''
|
||||
Remove handler(s) for opening URLs
|
||||
@param handler Either handler object itself or handler type.
|
||||
Specifying handler type will remove all handler which isinstance returns True.
|
||||
'''
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
|
||||
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
|
||||
opener = ydl._opener
|
||||
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
|
||||
if isinstance(handler, (type, tuple)):
|
||||
find_cp = lambda x: isinstance(x, handler)
|
||||
else:
|
||||
find_cp = lambda x: x is handler
|
||||
|
||||
removed = []
|
||||
for meth in dir(handler):
|
||||
if meth in ["redirect_request", "do_open", "proxy_open"]:
|
||||
# oops, coincidental match
|
||||
continue
|
||||
|
||||
i = meth.find("_")
|
||||
protocol = meth[:i]
|
||||
condition = meth[i + 1:]
|
||||
|
||||
if condition.startswith("error"):
|
||||
j = condition.find("_") + i + 1
|
||||
kind = meth[j + 1:]
|
||||
try:
|
||||
kind = int(kind)
|
||||
except ValueError:
|
||||
pass
|
||||
lookup = opener.handle_error.get(protocol, {})
|
||||
opener.handle_error[protocol] = lookup
|
||||
elif condition == "open":
|
||||
kind = protocol
|
||||
lookup = opener.handle_open
|
||||
elif condition == "response":
|
||||
kind = protocol
|
||||
lookup = opener.process_response
|
||||
elif condition == "request":
|
||||
kind = protocol
|
||||
lookup = opener.process_request
|
||||
else:
|
||||
continue
|
||||
|
||||
handlers = lookup.setdefault(kind, [])
|
||||
if handlers:
|
||||
handlers[:] = [x for x in handlers if not find_cp(x)]
|
||||
|
||||
removed.append(x for x in handlers if find_cp(x))
|
||||
|
||||
if removed:
|
||||
for x in opener.handlers:
|
||||
if find_cp(x):
|
||||
x.add_parent(None)
|
||||
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
|
||||
rh = ydl._request_director.handlers['Urllib']
|
||||
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
|
||||
return
|
||||
headers = ydl.params['http_headers'].copy()
|
||||
proxies = ydl.proxies.copy()
|
||||
clean_proxies(proxies, headers)
|
||||
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
|
||||
assert isinstance(opener, urllib.request.OpenerDirector)
|
||||
opener.add_handler(handler)
|
||||
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
|
||||
|
||||
|
||||
class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
|
@ -137,11 +83,11 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
|||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
def abematv_license_open(self, url):
|
||||
url = request_to_url(url)
|
||||
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
|
||||
ticket = urllib.parse.urlparse(url).netloc
|
||||
response_data = self._get_videokey_from_ticket(ticket)
|
||||
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
|
||||
'Content-Length': len(response_data),
|
||||
'Content-Length': str(len(response_data)),
|
||||
}, url=url, code=200)
|
||||
|
||||
|
||||
|
@ -213,10 +159,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
|||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
# don't allow adding it 2 times or more, though it's guarded
|
||||
remove_opener(self._downloader, AbemaLicenseHandler)
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
|
|
|
@ -6,10 +6,8 @@ import random
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_b64decode,
|
||||
)
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
|
@ -142,9 +140,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
|||
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||
except ExtractorError as e:
|
||||
message = None
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
resp = self._parse_json(
|
||||
e.cause.read().decode(), None, fatal=False) or {}
|
||||
e.cause.response.read().decode(), None, fatal=False) or {}
|
||||
message = resp.get('message') or resp.get('code')
|
||||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
|
@ -195,14 +193,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
|||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, compat_HTTPError):
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
raise e
|
||||
|
||||
if e.cause.code == 401:
|
||||
if e.cause.status == 401:
|
||||
# This usually goes away with a different random pkcs1pad, so retry
|
||||
continue
|
||||
|
||||
error = self._parse_json(e.cause.read(), video_id)
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
|
|
@ -2,11 +2,11 @@ import getpass
|
|||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
|
@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
form_page, urlh = form_page_res
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
if not re.match(r'https?://', post_url):
|
||||
post_url = compat_urlparse.urljoin(urlh.geturl(), post_url)
|
||||
post_url = compat_urlparse.urljoin(urlh.url, post_url)
|
||||
form_data = self._hidden_inputs(form_page)
|
||||
form_data.update(data)
|
||||
return self._download_webpage_handle(
|
||||
|
@ -1473,7 +1473,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
elif 'automatically signed in with' in provider_redirect_page:
|
||||
# Seems like comcast is rolling up new way of automatically signing customers
|
||||
oauth_redirect_url = self._html_search_regex(
|
||||
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
|
||||
'oauth redirect (signed)')
|
||||
# Just need to process the request. No useful data comes back
|
||||
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
|
||||
|
@ -1619,7 +1619,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
hidden_data['history'] = 1
|
||||
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending first bookend',
|
||||
urlh.url, video_id, 'Sending first bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_association_redirect, urlh = post_form(
|
||||
|
@ -1629,7 +1629,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
|
@ -1638,7 +1638,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
hidden_data['history'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending final bookend',
|
||||
urlh.url, video_id, 'Sending final bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
@ -1652,7 +1652,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
hidden_data['history_val'] = 1
|
||||
|
||||
provider_login_redirect_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending First Bookend',
|
||||
urlh.url, video_id, 'Sending First Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
provider_login_redirect_page, urlh = provider_login_redirect_page_res
|
||||
|
@ -1680,7 +1680,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
})
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.geturl())
|
||||
provider_association_redirect, url=urlh.url)
|
||||
|
||||
last_bookend_page, urlh = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
|
@ -1690,7 +1690,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
hidden_data['history_val'] = 3
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle(
|
||||
urlh.geturl(), video_id, 'Sending Final Bookend',
|
||||
urlh.url, video_id, 'Sending Final Bookend',
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
@ -1699,7 +1699,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
# based redirect that should be followed.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, url=urlh.geturl())
|
||||
provider_redirect_page, url=urlh.url)
|
||||
if provider_refresh_redirect_url:
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
provider_refresh_redirect_url, video_id,
|
||||
|
@ -1724,7 +1724,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
|||
'requestor_id': requestor_id,
|
||||
}), headers=mvpd_headers)
|
||||
except ExtractorError as e:
|
||||
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401:
|
||||
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise_mvpd_required()
|
||||
raise
|
||||
if '<pendingLogout' in session:
|
||||
|
|
|
@ -170,8 +170,10 @@ class AdultSwimIE(TurnerBaseIE):
|
|||
continue
|
||||
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
|
||||
if ext == 'm3u8':
|
||||
info['formats'].extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
info['formats'].extend(fmts)
|
||||
self._merge_subtitles(subs, target=info['subtitles'])
|
||||
elif ext == 'f4m':
|
||||
continue
|
||||
# info['formats'].extend(self._extract_f4m_formats(
|
||||
|
|
|
@ -338,6 +338,7 @@ class BiographyIE(AENetworksBaseIE):
|
|||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -22,8 +22,11 @@ class AmazonMiniTVBaseIE(InfoExtractor):
|
|||
|
||||
resp = self._download_json(
|
||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
asin, note=note, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'currentpageurl': '/',
|
||||
'currentplatform': 'dWeb'
|
||||
}, data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
'contentId': asin,
|
||||
|
@ -46,7 +49,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
|||
'ext': 'mp4',
|
||||
'title': 'May I Kiss You?',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||
'release_timestamp': 1644710400,
|
||||
'release_date': '20220213',
|
||||
|
@ -68,7 +71,7 @@ class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
|||
'ext': 'mp4',
|
||||
'title': 'Jahaan',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||
'release_timestamp': 1647475200,
|
||||
'release_date': '20220317',
|
||||
|
|
|
@ -26,6 +26,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
|||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -1,26 +1,30 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
make_archive_id,
|
||||
scale_thumbnails_to_max_format_width,
|
||||
)
|
||||
|
||||
|
||||
class Ant1NewsGrBaseIE(InfoExtractor):
|
||||
class AntennaBaseIE(InfoExtractor):
|
||||
def _download_and_extract_api_data(self, video_id, netloc, cid=None):
|
||||
url = f'{self.http_scheme()}//{netloc}{self._API_PATH}'
|
||||
info = self._download_json(url, video_id, query={'cid': cid or video_id})
|
||||
try:
|
||||
source = info['url']
|
||||
except KeyError:
|
||||
raise ExtractorError('no source found for %s' % video_id)
|
||||
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
|
||||
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
|
||||
info = self._download_json(f'{self.http_scheme()}//{netloc}{self._API_PATH}',
|
||||
video_id, query={'cid': cid or video_id})
|
||||
if not info.get('url'):
|
||||
raise ExtractorError(f'No source found for {video_id}')
|
||||
|
||||
ext = determine_ext(info['url'])
|
||||
if ext == 'm3u8':
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(info['url'], video_id, 'mp4')
|
||||
else:
|
||||
formats, subs = [{'url': info['url'], 'format_id': ext}], {}
|
||||
|
||||
thumbnails = scale_thumbnails_to_max_format_width(
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
|
||||
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') if info.get('thumb') else []
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.get('title'),
|
||||
|
@ -30,21 +34,31 @@ class Ant1NewsGrBaseIE(InfoExtractor):
|
|||
}
|
||||
|
||||
|
||||
class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
||||
IE_NAME = 'ant1newsgr:watch'
|
||||
IE_DESC = 'ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?ant1news\.gr)/watch/(?P<id>\d+)/'
|
||||
class AntennaGrWatchIE(AntennaBaseIE):
|
||||
IE_NAME = 'antenna:watch'
|
||||
IE_DESC = 'antenna.gr and ant1news.gr videos'
|
||||
_VALID_URL = r'https?://(?P<netloc>(?:www\.)?(?:antenna|ant1news)\.gr)/watch/(?P<id>\d+)/'
|
||||
_API_PATH = '/templates/data/player'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/watch/1506168/ant1-news-09112021-stis-18-45',
|
||||
'md5': '95925e6b32106754235f2417e0d2dfab',
|
||||
'md5': 'c472d9dd7cd233c63aff2ea42201cda6',
|
||||
'info_dict': {
|
||||
'id': '1506168',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:0ad00fa66ecf8aa233d26ab0dba7514a',
|
||||
'description': 'md5:18665af715a6dcfeac1d6153a44f16b0',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/26d46bf6-8158-4f02-b197-7096c714b2de.jpg',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/26d46bf6-8158-4f02-b197-7096c714b2de\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.antenna.gr/watch/1643812/oi-prodotes-epeisodio-01',
|
||||
'md5': '8f6f7dd3b1dba4d835ba990e25f31243',
|
||||
'info_dict': {
|
||||
'id': '1643812',
|
||||
'ext': 'mp4',
|
||||
'format_id': 'mp4',
|
||||
'title': 'ΟΙ ΠΡΟΔΟΤΕΣ – ΕΠΕΙΣΟΔΙΟ 01',
|
||||
'thumbnail': r're:https://ant1media\.azureedge\.net/imgHandler/\d+/b3d63096-e72d-43c4-87a0-00d4363d242f\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
|
@ -52,11 +66,12 @@ class Ant1NewsGrWatchIE(Ant1NewsGrBaseIE):
|
|||
video_id, netloc = self._match_valid_url(url).group('id', 'netloc')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
info = self._download_and_extract_api_data(video_id, netloc)
|
||||
info['description'] = self._og_search_description(webpage)
|
||||
info['description'] = self._og_search_description(webpage, default=None)
|
||||
info['_old_archive_ids'] = [make_archive_id('Ant1NewsGrWatch', video_id)],
|
||||
return info
|
||||
|
||||
|
||||
class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
||||
class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
IE_NAME = 'ant1newsgr:article'
|
||||
IE_DESC = 'ant1news.gr articles'
|
||||
_VALID_URL = r'https?://(?:www\.)?ant1news\.gr/[^/]+/article/(?P<id>\d+)/'
|
||||
|
@ -96,7 +111,7 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
|
|||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
|
||||
class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
||||
class Ant1NewsGrEmbedIE(AntennaBaseIE):
|
||||
IE_NAME = 'ant1newsgr:embed'
|
||||
IE_DESC = 'ant1news.gr embedded videos'
|
||||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
|
@ -121,7 +136,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
|
|||
canonical_url = self._request_webpage(
|
||||
HEADRequest(url), video_id,
|
||||
note='Resolve canonical player URL',
|
||||
errnote='Could not resolve canonical player URL').geturl()
|
||||
errnote='Could not resolve canonical player URL').url
|
||||
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
|
||||
cid = urllib.parse.parse_qs(query)['cid'][0]
|
||||
|
|
@ -1,16 +1,15 @@
|
|||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .naver import NaverBaseIE
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_HTTPError, compat_urllib_parse_unquote
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
dict_get,
|
||||
|
@ -899,7 +898,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 404:
|
||||
self.raise_no_formats(
|
||||
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
|
||||
else:
|
||||
|
@ -926,7 +925,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.geturl())
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
|
@ -947,237 +946,3 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||
if not info.get('title'):
|
||||
info['title'] = video_id
|
||||
return info
|
||||
|
||||
|
||||
class VLiveWebArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'web.archive:vlive'
|
||||
IE_DESC = 'web.archive.org saved vlive videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://)?web\.archive\.org/
|
||||
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
|
||||
(?:https?(?::|%3[Aa])//)?(?:
|
||||
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
|
||||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
|
||||
'md5': 'cc7314812855ce56de70a06a27314983',
|
||||
'info_dict': {
|
||||
'id': '1326',
|
||||
'ext': 'mp4',
|
||||
'title': "Girl's Day's Broadcast",
|
||||
'creator': "Girl's Day",
|
||||
'view_count': int,
|
||||
'uploader_id': 'muploader_a',
|
||||
'uploader_url': None,
|
||||
'uploader': None,
|
||||
'upload_date': '20150817',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1439816449,
|
||||
'like_count': int,
|
||||
'channel': 'Girl\'s Day',
|
||||
'channel_id': 'FDF27',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1439818140,
|
||||
'release_date': '20150817',
|
||||
'duration': 1014,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
|
||||
'info_dict': {
|
||||
'id': '16937',
|
||||
'ext': 'mp4',
|
||||
'title': '첸백시 걍방',
|
||||
'creator': 'EXO',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:12',
|
||||
'uploader_id': 'muploader_j',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20161112',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1478923074,
|
||||
'like_count': int,
|
||||
'channel': 'EXO',
|
||||
'channel_id': 'F94BD',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1478924280,
|
||||
'release_date': '20161112',
|
||||
'duration': 906,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
|
||||
'info_dict': {
|
||||
'id': '101870',
|
||||
'ext': 'mp4',
|
||||
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
|
||||
'creator': 'Dispatch',
|
||||
'view_count': int,
|
||||
'subtitles': 'mincount:6',
|
||||
'uploader_id': 'V__FRA08071',
|
||||
'uploader_url': 'http://vlive.tv',
|
||||
'uploader': None,
|
||||
'upload_date': '20181130',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'timestamp': 1543601327,
|
||||
'like_count': int,
|
||||
'channel': 'Dispatch',
|
||||
'channel_id': 'C796F3',
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1543601040,
|
||||
'release_date': '20181130',
|
||||
'duration': 279,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
# The wayback machine has special timestamp and "mode" values:
|
||||
# timestamp:
|
||||
# 1 = the first capture
|
||||
# 2 = the last capture
|
||||
# mode:
|
||||
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
|
||||
|
||||
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
def _download_archived_json(self, url, video_id, **kwargs):
|
||||
page = self._download_archived_page(url, video_id, **kwargs)
|
||||
if not page:
|
||||
raise ExtractorError('Page was not archived', expected=True)
|
||||
else:
|
||||
return self._parse_json(page, video_id)
|
||||
|
||||
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
|
||||
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
|
||||
if not m3u8_doc:
|
||||
return
|
||||
|
||||
# M3U8 document should be changed to archive domain
|
||||
m3u8_doc = m3u8_doc.splitlines()
|
||||
url_base = m3u8_url.rsplit('/', 1)[0]
|
||||
first_segment = None
|
||||
for i, line in enumerate(m3u8_doc):
|
||||
if not line.startswith('#'):
|
||||
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
|
||||
first_segment = first_segment or m3u8_doc[i]
|
||||
|
||||
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
|
||||
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
|
||||
fatal=False, note='Check first segment availablity')
|
||||
if urlh:
|
||||
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
|
||||
if subtitles:
|
||||
self._report_ignoring_subs('m3u8')
|
||||
return formats
|
||||
|
||||
# Closely follows the logic of the ArchiveTeam grab script
|
||||
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
|
||||
def _real_extract(self, url):
|
||||
video_id, url_date = self._match_valid_url(url).group('id', 'date')
|
||||
|
||||
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
|
||||
|
||||
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
|
||||
user_country = traverse_obj(player_info, ('common', 'userCountry'))
|
||||
|
||||
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
|
||||
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
|
||||
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
|
||||
|
||||
inkey = self._download_archived_json(
|
||||
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
|
||||
'appId': app_id,
|
||||
'platformType': 'PC',
|
||||
'gcc': user_country,
|
||||
'locale': 'en_US',
|
||||
}, fatal=False)
|
||||
|
||||
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
|
||||
|
||||
vod_data = self._download_archived_json(
|
||||
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
|
||||
'key': inkey.get('inkey'),
|
||||
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
|
||||
'sid': '2024',
|
||||
'ver': '2.0',
|
||||
'devt': 'html5_pc',
|
||||
'doct': 'json',
|
||||
'ptc': 'https',
|
||||
'sptc': 'https',
|
||||
'cpt': 'vtt',
|
||||
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
|
||||
'pv': '4.26.9',
|
||||
'dr': '1920x1080',
|
||||
'cpl': 'en_US',
|
||||
'lc': 'en_US',
|
||||
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
|
||||
'adu': '%2F',
|
||||
'videoId': vod_id,
|
||||
'cc': user_country,
|
||||
})
|
||||
|
||||
formats = []
|
||||
|
||||
streams = traverse_obj(vod_data, ('streams', ...))
|
||||
if len(streams) > 1:
|
||||
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
|
||||
stream = streams[0]
|
||||
|
||||
max_stream = max(
|
||||
stream.get('videos') or [],
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_stream is not None:
|
||||
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
|
||||
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
|
||||
|
||||
# For parts of the project MP4 files were archived
|
||||
max_video = max(
|
||||
traverse_obj(vod_data, ('videos', 'list', ...)),
|
||||
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
|
||||
if max_video is not None:
|
||||
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
|
||||
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
|
||||
fatal=False, note='Check video availablity')
|
||||
if urlh:
|
||||
formats.append({'url': video_url})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_info, ('postDetail', 'post', {
|
||||
'title': ('officialVideo', 'title', {str}),
|
||||
'creator': ('author', 'nickname', {str}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelCode', {str}),
|
||||
'duration': ('officialVideo', 'playTime', {int_or_none}),
|
||||
'view_count': ('officialVideo', 'playCount', {int_or_none}),
|
||||
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
|
||||
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})),
|
||||
**traverse_obj(vod_data, ('meta', {
|
||||
'uploader_id': ('user', 'id', {str}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_url': ('user', 'url', {url_or_none}),
|
||||
'thumbnail': ('cover', 'source', {url_or_none}),
|
||||
}), expected_type=lambda x: x or None),
|
||||
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
|
||||
}
|
||||
|
|
|
@ -169,7 +169,7 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
)))
|
||||
|
||||
short_label = traverse_obj(stream_version, 'shortLabel', expected_type=str, default='?')
|
||||
if stream['protocol'].startswith('HLS'):
|
||||
if 'HLS' in stream['protocol']:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
stream['url'], video_id=video_id, ext='mp4', m3u8_id=stream_version_code, fatal=False)
|
||||
for fmt in fmts:
|
||||
|
|
|
@ -1,196 +0,0 @@
|
|||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_age_limit,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
|
||||
_KALTURA_KEYS = [
|
||||
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
|
||||
'widescreen_thumbnail_url', 'screencap_widescreen',
|
||||
]
|
||||
_API_SUFFIX = {'retrocrush.tv': '-ott'}
|
||||
|
||||
def _call_api(self, host, endpoint, video_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query,
|
||||
headers=self.geo_verification_headers())['objects']
|
||||
|
||||
def _download_object_data(self, host, object_id, resource):
|
||||
return self._call_api(
|
||||
host, 'search', object_id, {'id': object_id}, resource)[0]
|
||||
|
||||
def _get_object_description(self, obj):
|
||||
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
|
||||
|
||||
def _parse_video_data(self, video):
|
||||
title = video['name']
|
||||
|
||||
entry_id, partner_id = [None] * 2
|
||||
for k in self._KALTURA_KEYS:
|
||||
k_url = video.get(k)
|
||||
if k_url:
|
||||
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
|
||||
if mobj:
|
||||
partner_id, entry_id = mobj.groups()
|
||||
break
|
||||
|
||||
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
|
||||
categories = list(filter(None, [c.get('name') for c in meta_categories]))
|
||||
|
||||
show_info = video.get('show_info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': self._get_object_description(video),
|
||||
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
|
||||
'categories': categories,
|
||||
'series': show_info.get('show_name'),
|
||||
'season_number': int_or_none(show_info.get('season_num')),
|
||||
'season_id': show_info.get('season_id'),
|
||||
'episode_number': int_or_none(show_info.get('episode_num')),
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
'age_limit': 13,
|
||||
'categories': 'count:5',
|
||||
'duration': 5812,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False) or {}
|
||||
video_id = embed_vars.get('entry_id') or video_id
|
||||
|
||||
video = self._download_object_data(host, video_id, 'video')
|
||||
return self._parse_video_data(video)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
|
||||
'info_dict': {
|
||||
'id': '6447',
|
||||
'title': 'Fruity Samurai',
|
||||
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 1000000000
|
||||
|
||||
def _fetch_page(self, domain, parent_id, page):
|
||||
videos = self._call_api(
|
||||
domain, 'getreferencedobjects', parent_id, {
|
||||
'max': self._PAGE_SIZE,
|
||||
'object_type': 'video',
|
||||
'parent_id': parent_id,
|
||||
'start': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in videos:
|
||||
yield self._parse_video_data(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
else:
|
||||
show = self._download_object_data(host, playlist_id, 'show')
|
||||
title = show.get('name')
|
||||
description = self._get_object_description(show)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, host, playlist_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
|
@ -1,5 +1,5 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
|
|||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _handle_error(self, e, code):
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
|
||||
error = self._parse_json(e.cause.read(), None)
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == code:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
if error.get('error') == 'required_registered':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
|
|
|
@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
|||
|
||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||
query = query or {}
|
||||
amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
|
||||
amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||
date = amz_date[:8]
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
|
|
87
yt_dlp/extractor/axs.py
Normal file
87
yt_dlp/extractor/axs.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AxsIE(InfoExtractor):
|
||||
IE_NAME = 'axs.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
|
||||
'md5': '8d97736ae8e50c64df528e5e676778cf',
|
||||
'info_dict': {
|
||||
'id': '5f4dc776b70e4f1c194f22ef',
|
||||
'title': 'Small Town',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
|
||||
'upload_date': '20230602',
|
||||
'timestamp': 1685729564,
|
||||
'duration': 1284.216,
|
||||
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||
'season': 2,
|
||||
'episode': '3',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
|
||||
'md5': '300ae795cd8f9984652c0949734ffbdc',
|
||||
'info_dict': {
|
||||
'id': '5f488148b70e4f392572977c',
|
||||
'display_id': 'daryl-hall',
|
||||
'title': 'Daryl Hall',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
|
||||
'upload_date': '20230214',
|
||||
'timestamp': 1676403615,
|
||||
'duration': 2570.668,
|
||||
'series': 'The Big Interview with Dan Rather',
|
||||
'season': 3,
|
||||
'episode': '5',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_json_data = self._search_json(
|
||||
r'mountObj\s*=', webpage, 'video ID data', display_id,
|
||||
transform_source=js_to_json)
|
||||
video_id = webpage_json_data['video_id']
|
||||
company_id = webpage_json_data['company_id']
|
||||
|
||||
meta = self._download_json(
|
||||
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
|
||||
video_id, query={'device_type': 'desktop_web'})['video']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
subtitles = {}
|
||||
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
|
||||
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
|
||||
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'series': ('seriestitle', {str}),
|
||||
'season': ('season', {int}),
|
||||
'episode': ('episode', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
}
|
|
@ -31,7 +31,7 @@ class BanByeBaseIE(InfoExtractor):
|
|||
|
||||
|
||||
class BanByeIE(BanByeBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
|
||||
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
|
||||
|
@ -59,7 +59,27 @@ class BanByeIE(BanByeBaseIE):
|
|||
'title': 'Krzysztof Karoń',
|
||||
'id': 'p_Ld82N6gBw_OJ',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
'playlist_mincount': 9,
|
||||
}, {
|
||||
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
|
||||
'info_dict': {
|
||||
'id': 'v_kb6_o1Kyq-CD',
|
||||
'ext': 'mp4',
|
||||
'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
|
||||
'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
|
||||
'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
|
||||
'channel_id': 'ch_QgWnHvDG2fo5',
|
||||
'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
|
||||
'duration': 597,
|
||||
'timestamp': 1688642656,
|
||||
'upload_date': '20230706',
|
||||
'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
|
||||
'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -2,11 +2,11 @@ import functools
|
|||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.error
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError, compat_str, compat_urlparse
|
||||
from ..compat import compat_str, compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
|
@ -15,11 +15,13 @@ from ..utils import (
|
|||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
|
@ -41,7 +43,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
|
||||
music/(?:clips|audiovideo/popular)[/#]|
|
||||
radio/player/|
|
||||
sounds/play/|
|
||||
events/[^/]+/play/[^/]+/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
|
@ -218,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
|
||||
'note': 'Audio',
|
||||
'info_dict': {
|
||||
'id': 'm0007jz9',
|
||||
'ext': 'mp4',
|
||||
'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
|
||||
'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
|
||||
'duration': 9840,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
|
||||
'only_matching': True,
|
||||
|
@ -277,7 +264,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||
post_url, None, 'Logging in', data=urlencode_postdata(login_form),
|
||||
headers={'Referer': self._LOGIN_URL})
|
||||
|
||||
if self._LOGIN_URL in urlh.geturl():
|
||||
if self._LOGIN_URL in urlh.url:
|
||||
error = clean_html(get_element_by_class('form-message', response))
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
|
@ -388,8 +375,8 @@ class BBCCoUkIE(InfoExtractor):
|
|||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False)
|
||||
except ExtractorError as e:
|
||||
if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
|
||||
and e.exc_info[1].code in (403, 404)):
|
||||
if not (isinstance(e.exc_info[1], HTTPError)
|
||||
and e.exc_info[1].status in (403, 404)):
|
||||
raise
|
||||
fmts = []
|
||||
formats.extend(fmts)
|
||||
|
@ -472,7 +459,7 @@ class BBCCoUkIE(InfoExtractor):
|
|||
|
||||
return programme_id, title, description, duration, formats, subtitles
|
||||
except ExtractorError as ee:
|
||||
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
|
||||
if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
|
||||
raise
|
||||
|
||||
# fallback to legacy playlist
|
||||
|
@ -844,6 +831,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
|
@ -983,7 +984,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
# Some playlist URL may fail with 500, at the same time
|
||||
# the other one may work fine (e.g.
|
||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 500:
|
||||
continue
|
||||
raise
|
||||
if entry:
|
||||
|
@ -1128,6 +1129,13 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
}
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
@ -8,7 +9,8 @@ from ..utils import (
|
|||
class BildIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html'
|
||||
IE_DESC = 'Bild.de'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'note': 'static MP4 only',
|
||||
'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html',
|
||||
'md5': 'dd495cbd99f2413502a1713a1156ac8a',
|
||||
'info_dict': {
|
||||
|
@ -19,7 +21,19 @@ class BildIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'note': 'static MP4 and HLS',
|
||||
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
|
||||
'md5': 'fb0ed4f09c495d4ba7ce2eee0bb90de1',
|
||||
'info_dict': {
|
||||
'id': '85158620',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Sprungturm-Skandal',
|
||||
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 69,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
@ -27,11 +41,23 @@ class BildIE(InfoExtractor):
|
|||
video_data = self._download_json(
|
||||
url.split('.bild.html')[0] + ',view=json.bild.html', video_id)
|
||||
|
||||
formats = []
|
||||
for src in traverse_obj(video_data, ('clipList', 0, 'srces', lambda _, v: v['src'])):
|
||||
src_type = src.get('type')
|
||||
if src_type == 'application/x-mpegURL':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
elif src_type == 'video/mp4':
|
||||
formats.append({'url': src['src'], 'format_id': 'http-mp4'})
|
||||
else:
|
||||
self.report_warning(f'Skipping unsupported format type: "{src_type}"')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': unescapeHTML(video_data['title']).strip(),
|
||||
'description': unescapeHTML(video_data.get('description')),
|
||||
'url': video_data['clipList'][0]['srces'][0]['src'],
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('poster'),
|
||||
'duration': int_or_none(video_data.get('durationSec')),
|
||||
}
|
||||
|
|
|
@ -3,21 +3,24 @@ import functools
|
|||
import hashlib
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
|
@ -33,27 +36,31 @@ from ..utils import (
|
|||
unsmuggle_url,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class BilibiliBaseIE(InfoExtractor):
|
||||
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
format_names = {
|
||||
r['quality']: traverse_obj(r, 'new_description', 'display_desc')
|
||||
for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
|
||||
}
|
||||
|
||||
audios = traverse_obj(play_info, ('dash', 'audio', ...))
|
||||
audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
|
||||
flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
|
||||
if flac_audio:
|
||||
audios.append(flac_audio)
|
||||
formats = [{
|
||||
'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
|
||||
'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
|
||||
'acodec': audio.get('codecs'),
|
||||
'acodec': traverse_obj(audio, ('codecs', {str.lower})),
|
||||
'vcodec': 'none',
|
||||
'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(audio.get('size'))
|
||||
'filesize': int_or_none(audio.get('size')),
|
||||
'format_id': str_or_none(audio.get('id')),
|
||||
} for audio in audios]
|
||||
|
||||
formats.extend({
|
||||
|
@ -64,9 +71,13 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
'height': int_or_none(video.get('height')),
|
||||
'vcodec': video.get('codecs'),
|
||||
'acodec': 'none' if audios else None,
|
||||
'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
|
||||
'tbr': float_or_none(video.get('bandwidth'), scale=1000),
|
||||
'filesize': int_or_none(video.get('size')),
|
||||
'quality': int_or_none(video.get('id')),
|
||||
'format_id': traverse_obj(
|
||||
video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
|
||||
('id', {str_or_none}), get_all=False),
|
||||
'format': format_names.get(video.get('id')),
|
||||
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
|
||||
|
||||
|
@ -135,9 +146,20 @@ class BilibiliBaseIE(InfoExtractor):
|
|||
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
|
||||
yield from children
|
||||
|
||||
def _get_episodes_from_season(self, ss_id, url):
|
||||
season_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', ss_id,
|
||||
note='Downloading season info', query={'season_id': ss_id},
|
||||
headers={'Referer': url, **self.geo_verification_headers()})
|
||||
|
||||
for entry in traverse_obj(season_info, (
|
||||
'result', 'main_section', 'episodes',
|
||||
lambda _, v: url_or_none(v['share_url']) and v['id'])):
|
||||
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
|
||||
|
||||
|
||||
class BiliBiliIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||
|
@ -233,7 +255,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
|||
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
|
||||
'duration': 313.557,
|
||||
'upload_date': '20220709',
|
||||
'uploader': '小夫Tech',
|
||||
'uploader': '小夫太渴',
|
||||
'timestamp': 1657347907,
|
||||
'uploader_id': '1326814124',
|
||||
'comment_count': int,
|
||||
|
@ -403,77 +425,94 @@ class BiliBiliIE(BilibiliBaseIE):
|
|||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss897',
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||
'info_dict': {
|
||||
'id': 'ss897',
|
||||
'id': '267851',
|
||||
'ext': 'mp4',
|
||||
'series': '神的记事本',
|
||||
'season': '神的记事本',
|
||||
'season_id': 897,
|
||||
'series': '鬼灭之刃',
|
||||
'series_id': '4358',
|
||||
'season': '鬼灭之刃',
|
||||
'season_id': '26801',
|
||||
'season_number': 1,
|
||||
'episode': '你与旅行包',
|
||||
'episode_number': 2,
|
||||
'title': '神的记事本:第2话 你与旅行包',
|
||||
'duration': 1428.487,
|
||||
'timestamp': 1310809380,
|
||||
'upload_date': '20110716',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'episode': '残酷',
|
||||
'episode_id': '267851',
|
||||
'episode_number': 1,
|
||||
'title': '1 残酷',
|
||||
'duration': 1425.256,
|
||||
'timestamp': 1554566400,
|
||||
'upload_date': '20190406',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
|
||||
'only_matching': True,
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
episode_id = video_id[2:]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
|
||||
or '正在观看预览,大会员免费看全片' in webpage):
|
||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
formats = self.extract_formats(play_info)
|
||||
if (not formats and '成为大会员抢先看' in webpage
|
||||
and play_info.get('durl') and not play_info.get('dash')):
|
||||
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
bangumi_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
|
||||
query={'ep_id': episode_id}, headers=headers)['result']
|
||||
|
||||
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
|
||||
episode_number, episode_info = next((
|
||||
(idx, ep) for idx, ep in enumerate(traverse_obj(
|
||||
bangumi_info, ('episodes', ..., {dict})), 1)
|
||||
if str_or_none(ep.get('id')) == episode_id), (1, {}))
|
||||
|
||||
season_id = bangumi_info.get('season_id')
|
||||
season_number = season_id and next((
|
||||
idx + 1 for idx, e in enumerate(
|
||||
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
|
||||
traverse_obj(bangumi_info, ('seasons', ...)))
|
||||
if e.get('season_id') == season_id
|
||||
), None)
|
||||
|
||||
aid = episode_info.get('aid')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': traverse_obj(initial_state, 'h1Title'),
|
||||
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
|
||||
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
|
||||
'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
|
||||
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
|
||||
'season_id': season_id,
|
||||
**traverse_obj(bangumi_info, {
|
||||
'series': ('series', 'series_title', {str}),
|
||||
'series_id': ('series', 'series_id', {str_or_none}),
|
||||
'thumbnail': ('square_cover', {url_or_none}),
|
||||
}),
|
||||
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
|
||||
'episode': episode_info.get('long_title'),
|
||||
'episode_id': episode_id,
|
||||
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
|
||||
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
|
||||
'timestamp': int_or_none(episode_info.get('pub_time')),
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(
|
||||
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
|
||||
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
|
||||
'http_headers': {'Referer': url, **self.geo_verification_headers()},
|
||||
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
|
||||
'info_dict': {
|
||||
|
@ -485,16 +524,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
media_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
ss_id = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
|
||||
episode_list = self._download_json(
|
||||
'https://api.bilibili.com/pgc/web/season/section', media_id,
|
||||
query={'season_id': initial_state['mediaInfo']['season_id']},
|
||||
note='Downloading season info')['result']['main_section']['episodes']
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
|
||||
|
||||
return self.playlist_result((
|
||||
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
|
||||
for entry in episode_list), media_id)
|
||||
|
||||
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
_VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
|
||||
'info_dict': {
|
||||
'id': '26801'
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
ss_id = self._match_id(url)
|
||||
|
||||
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
|
||||
|
||||
|
||||
class BilibiliSpaceBaseIE(InfoExtractor):
|
||||
|
@ -575,7 +624,7 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
|||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
|
@ -633,13 +682,35 @@ class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
|
|||
return self.playlist_result(paged_list, playlist_id)
|
||||
|
||||
|
||||
class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
||||
_VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
|
||||
class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
|
||||
def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
|
||||
for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
|
||||
|
||||
def _get_uploader(self, uid, playlist_id):
|
||||
webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
|
||||
return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
|
||||
|
||||
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
|
||||
metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
metadata.pop('page_count', None)
|
||||
metadata.pop('page_size', None)
|
||||
return metadata, page_list
|
||||
|
||||
|
||||
class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
|
||||
'info_dict': {
|
||||
'id': '2142762_57445',
|
||||
'title': '《底特律 变人》'
|
||||
'title': '【完结】《底特律 变人》全结局流程解说',
|
||||
'description': '',
|
||||
'uploader': '老戴在此',
|
||||
'uploader_id': '2142762',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
|
||||
},
|
||||
'playlist_mincount': 31,
|
||||
}]
|
||||
|
@ -660,22 +731,251 @@ class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
|
|||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'title': traverse_obj(page_data, ('meta', 'name'))
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**traverse_obj(page_data, {
|
||||
'title': ('meta', 'name', {str}),
|
||||
'description': ('meta', 'description', {str}),
|
||||
'uploader_id': ('meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('meta', 'ptime', {int_or_none}),
|
||||
'thumbnail': ('meta', 'cover', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in page_data.get('archives', []):
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
|
||||
BiliBiliIE, entry['bvid'])
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, metadata['title'])
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
|
||||
'info_dict': {
|
||||
'id': '1958703906_547718',
|
||||
'title': '直播回放',
|
||||
'description': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
playlist_meta = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
|
||||
), {
|
||||
'title': ('data', 'meta', 'name', {str}),
|
||||
'description': ('data', 'meta', 'description', {str}),
|
||||
'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
|
||||
'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
|
||||
})
|
||||
|
||||
def fetch_page(page_idx):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/series/archives',
|
||||
playlist_id, note=f'Downloading page {page_idx}',
|
||||
query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
page_size = page_data['page']['size']
|
||||
entry_count = page_data['page']['total']
|
||||
return {
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**playlist_meta
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
return self._get_entries(page_data, 'archives')
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id, **metadata)
|
||||
|
||||
|
||||
class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
|
||||
'info_dict': {
|
||||
'id': '1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'description': '',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
fid = self._match_id(url)
|
||||
|
||||
list_info = self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
|
||||
fid, note='Downloading favlist metadata')
|
||||
if list_info['code'] == -403:
|
||||
self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
|
||||
|
||||
entries = self._get_entries(self._download_json(
|
||||
f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
|
||||
fid, note='Download favlist entries'), 'data')
|
||||
|
||||
return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('intro', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'modified_timestamp': ('mtime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
'view_count': ('cnt_info', 'play', {int_or_none}),
|
||||
'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
|
||||
})))
|
||||
|
||||
|
||||
class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/watchlater/#/list',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
|
||||
watchlater_info = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
|
||||
if watchlater_info['code'] == -101:
|
||||
self.raise_login_required(msg='You need to login to access your watchlater list')
|
||||
entries = self._get_entries(watchlater_info, ('data', 'list'))
|
||||
return self.playlist_result(entries, id=list_id, title='稍后再看')
|
||||
|
||||
|
||||
class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
'title': '直播回放',
|
||||
'uploader': '靡烟miya',
|
||||
'uploader_id': '1958703906',
|
||||
'timestamp': 1637985853,
|
||||
'upload_date': '20211127',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||
'info_dict': {
|
||||
'id': '5_547718',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
'title': '【V2】(旧)',
|
||||
'uploader': '晓月春日',
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
|
||||
'info_dict': {
|
||||
'id': '3_1103407912',
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
'skip': 'redirect url',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/watchlater',
|
||||
'info_dict': {'id': 'watchlater'},
|
||||
'playlist_mincount': 0,
|
||||
'skip': 'login required',
|
||||
}]
|
||||
|
||||
def _extract_medialist(self, query, list_id):
|
||||
for page_num in itertools.count(1):
|
||||
page_data = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/medialist/resource/list',
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
|
||||
)['data']
|
||||
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
|
||||
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
|
||||
if not page_data.get('has_more', False):
|
||||
break
|
||||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
|
||||
error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
|
||||
if error_code == -400 and list_id == 'watchlater':
|
||||
self.raise_login_required('You need to login to access your watchlater playlist')
|
||||
elif error_code == -403:
|
||||
self.raise_login_required('This is a private playlist. You need to login as its owner')
|
||||
elif error_code == 11010:
|
||||
raise ExtractorError('Playlist is no longer available', expected=True)
|
||||
raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
|
||||
|
||||
query = {
|
||||
'ps': 20,
|
||||
'with_current': False,
|
||||
**traverse_obj(initial_state, {
|
||||
'type': ('playlist', 'type', {int_or_none}),
|
||||
'biz_id': ('playlist', 'id', {int_or_none}),
|
||||
'tid': ('tid', {int_or_none}),
|
||||
'sort_field': ('sortFiled', {int_or_none}),
|
||||
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
|
||||
})
|
||||
}
|
||||
metadata = {
|
||||
'id': f'{query["type"]}_{query["biz_id"]}',
|
||||
**traverse_obj(initial_state, ('mediaListInfo', {
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('upper', 'name', {str}),
|
||||
'uploader_id': ('upper', 'mid', {str_or_none}),
|
||||
'timestamp': ('ctime', {int_or_none}),
|
||||
'thumbnail': ('cover', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
|
||||
|
||||
|
||||
class BilibiliCategoryIE(InfoExtractor):
|
||||
IE_NAME = 'Bilibili category extractor'
|
||||
_MAX_RESULTS = 1000000
|
||||
_VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
||||
'info_dict': {
|
||||
|
@ -1360,7 +1660,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
|||
|
||||
|
||||
class BiliLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://live.bilibili.com/196',
|
||||
|
|
|
@ -2,9 +2,9 @@ import functools
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
|
|
|
@ -1,56 +1,170 @@
|
|||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
|
||||
|
||||
def _parse_vue_attributes(self, name, string, video_id):
|
||||
attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
|
||||
|
||||
for key, value in attributes.items():
|
||||
if key.startswith(':'):
|
||||
attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(
|
||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video_info:
|
||||
continue
|
||||
video_url = video_info.get('src')
|
||||
if not video_url:
|
||||
continue
|
||||
quality = 'high' if '_high' in video_url else 'low'
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'quality': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': traverse_obj(title_result, ('title', {str.strip})) or None,
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from .adobepass import AdobePassIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_class,
|
||||
|
@ -155,7 +155,7 @@ class BravoTVIE(AdobePassIE):
|
|||
chapters = None
|
||||
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').geturl()
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
|
||||
if 'mpeg_cenc' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
|
|
@ -7,10 +7,10 @@ from .adobepass import AdobePassIE
|
|||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTTPError,
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
dict_get,
|
||||
|
@ -915,8 +915,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
|||
json_data = self._download_json(api_url, video_id, headers=headers)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
|
||||
message = json_data.get('message') or json_data['error_code']
|
||||
if json_data.get('error_subcode') == 'CLIENT_GEO':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
|
|
127
yt_dlp/extractor/brilliantpala.py
Normal file
127
yt_dlp/extractor/brilliantpala.py
Normal file
|
@ -0,0 +1,127 @@
|
|||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BrilliantpalaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brilliantpala'
|
||||
_DOMAIN = '{subdomain}.brilliantpala.org'
|
||||
|
||||
def _initialize_pre_login(self):
|
||||
self._HOMEPAGE = f'https://{self._DOMAIN}'
|
||||
self._LOGIN_API = f'{self._HOMEPAGE}/login/'
|
||||
self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
|
||||
self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
|
||||
self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
|
||||
|
||||
def _get_logged_in_username(self, url, video_id):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
if self._LOGIN_API == urlh.url:
|
||||
self.raise_login_required()
|
||||
return self._html_search_regex(
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
self._LOGIN_API, None, 'Downloading login page'))
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
|
||||
|
||||
logged_page = self._download_webpage(
|
||||
self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if self._html_search_regex(
|
||||
r'(Your username / email and password)', logged_page, 'auth fail', default=None):
|
||||
raise ExtractorError('wrong username or password', expected=True)
|
||||
|
||||
# the maximum number of logins is one
|
||||
if self._html_search_regex(
|
||||
r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
|
||||
logout_device_form = self._hidden_inputs(logged_page)
|
||||
self._download_webpage(
|
||||
self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
|
||||
note='Logging out other devices', data=urlencode_postdata(logout_device_form))
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
|
||||
video_id = f'{course_id}-{content_id}'
|
||||
|
||||
username = self._get_logged_in_username(url, video_id)
|
||||
|
||||
content_json = self._download_json(
|
||||
self._CONTENT_API.format(content_id=content_id), video_id,
|
||||
note='Fetching content info', errnote='Unable to fetch content info')
|
||||
|
||||
entries = []
|
||||
for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
|
||||
formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
|
||||
if not formats:
|
||||
continue
|
||||
entries.append({
|
||||
'id': str(stream['id']),
|
||||
'title': content_json.get('title'),
|
||||
'formats': formats,
|
||||
'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
|
||||
'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
|
||||
'thumbnail': content_json.get('cover_image'),
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id=video_id, playlist_title=content_json.get('title'))
|
||||
|
||||
|
||||
class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Elearn'
|
||||
IE_DESC = 'VoD on elearn.brilliantpala.org'
|
||||
_VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
|
||||
'info_dict': {
|
||||
'id': '23577',
|
||||
'ext': 'mp4',
|
||||
'title': 'Physical World, Units and Measurements - 1',
|
||||
'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
|
||||
|
||||
|
||||
class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Classes'
|
||||
IE_DESC = 'VoD on classes.brilliantpala.org'
|
||||
_VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
|
||||
'info_dict': {
|
||||
'id': '9128',
|
||||
'ext': 'mp4',
|
||||
'title': 'Motion in a Straight Line - Class 1',
|
||||
'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')
|
39
yt_dlp/extractor/canal1.py
Normal file
39
yt_dlp/extractor/canal1.py
Normal file
|
@ -0,0 +1,39 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||
'info_dict': {
|
||||
'id': '63b39f6b354977084b85ab54',
|
||||
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||
'info_dict': {
|
||||
'id': '63b39e93f5fd223aa32250fb',
|
||||
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
# Geo-restricted to Colombia
|
||||
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.url_result(
|
||||
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||
display_id=display_id, url_transparent=True)
|
|
@ -64,7 +64,7 @@ class CanalplusIE(InfoExtractor):
|
|||
# response = self._request_webpage(
|
||||
# HEADRequest(fmt_url), video_id,
|
||||
# 'Checking if the video is georestricted')
|
||||
# if '/blocage' in response.geturl():
|
||||
# if '/blocage' in response.url:
|
||||
# raise ExtractorError(
|
||||
# 'The video is not available in your country',
|
||||
# expected=True)
|
||||
|
|
136
yt_dlp/extractor/caracoltv.py
Normal file
136
yt_dlp/extractor/caracoltv.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaracolTvPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||
_NETRC_MACHINE = 'caracoltv-play'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'info_dict': {
|
||||
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'title': 'La teoría del promedio',
|
||||
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||
'title': 'Ella',
|
||||
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _extract_app_token(self, webpage):
|
||||
config_js_path = self._search_regex(
|
||||
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||
|
||||
mediation_config = {} if not config_js_path else self._search_json(
|
||||
r'mediation\s*:', self._download_webpage(
|
||||
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||
|
||||
key = traverse_obj(
|
||||
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||
secret = traverse_obj(
|
||||
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||
|
||||
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||
|
||||
def _perform_login(self, email, password):
|
||||
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||
app_token = self._extract_app_token(webpage)
|
||||
|
||||
bearer_token = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||
|
||||
self._USER_TOKEN = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
}, data=json.dumps({
|
||||
'device_data': {
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'device_token': '',
|
||||
'device_type': 'web'
|
||||
},
|
||||
'login_data': {
|
||||
'enabled': True,
|
||||
'email': email,
|
||||
'password': password,
|
||||
}
|
||||
}).encode())['user_token']
|
||||
|
||||
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_data['id'],
|
||||
'title': video_data.get('name'),
|
||||
'description': video_data.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': traverse_obj(
|
||||
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||
'series_id': series_id,
|
||||
'season_id': season_id,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(video_data.get('item_order')),
|
||||
'is_live': video_data.get('entry_type') == 3,
|
||||
}
|
||||
|
||||
def _extract_series_seasons(self, seasons, series_id):
|
||||
for season in seasons:
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||
|
||||
season_number = season.get('order')
|
||||
for episode in api_response['items']:
|
||||
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
if self._USER_TOKEN is None:
|
||||
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||
|
||||
if not api_response.get('seasons'):
|
||||
return self._extract_video(api_response)
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||
series_id, **traverse_obj(api_response, {
|
||||
'title': 'name',
|
||||
'description': 'description',
|
||||
}))
|
|
@ -2,6 +2,7 @@ import re
|
|||
import json
|
||||
import base64
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
|
@ -65,6 +66,7 @@ class CBCIE(InfoExtractor):
|
|||
'uploader': 'CBCC-NEW',
|
||||
'timestamp': 255977160,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# multiple iframes
|
||||
'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
|
||||
|
@ -96,7 +98,7 @@ class CBCIE(InfoExtractor):
|
|||
# multiple CBC.APP.Caffeine.initInstance(...)
|
||||
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
|
||||
'info_dict': {
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
|
||||
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME
|
||||
'id': 'dog-indoor-exercise-winter-1.3928238',
|
||||
'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
|
||||
},
|
||||
|
@ -161,7 +163,7 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'upload_date': '20160210',
|
||||
'uploader': 'CBCC-NEW',
|
||||
},
|
||||
'skip': 'Geo-restricted to Canada',
|
||||
'skip': 'Geo-restricted to Canada and no longer available',
|
||||
}, {
|
||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||
|
@ -174,6 +176,9 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'timestamp': 1425704400,
|
||||
'upload_date': '20150307',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||
'chapters': [],
|
||||
'duration': 494.811,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
||||
|
@ -186,6 +191,28 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'timestamp': 1320410746,
|
||||
'upload_date': '20111104',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||
'chapters': [],
|
||||
'duration': 186.867,
|
||||
},
|
||||
}, {
|
||||
# Has subtitles
|
||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||
'url': 'http://www.cbc.ca/player/play/2249992771553',
|
||||
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
|
||||
'info_dict': {
|
||||
'id': '2249992771553',
|
||||
'ext': 'mp4',
|
||||
'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake',
|
||||
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
|
||||
'timestamp': 1690596000,
|
||||
'duration': 2716.333,
|
||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
|
||||
'uploader': 'CBCC-NEW',
|
||||
'chapters': 'count:5',
|
||||
'upload_date': '20230729',
|
||||
},
|
||||
}]
|
||||
|
||||
|
@ -199,9 +226,42 @@ class CBCPlayerIE(InfoExtractor):
|
|||
'force_smil_url': True
|
||||
}),
|
||||
'id': video_id,
|
||||
'_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS
|
||||
}
|
||||
|
||||
|
||||
class CBCPlayerPlaylistIE(InfoExtractor):
|
||||
IE_NAME = 'cbc.ca:player:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': 'news/tv shows/the national/latest broadcast',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.cbc.ca/player/news/Canada/North',
|
||||
'playlist_mincount': 25,
|
||||
'info_dict': {
|
||||
'id': 'news/canada/north',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = urllib.parse.unquote(self._match_id(url)).lower()
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
json_content = self._search_json(
|
||||
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id)
|
||||
|
||||
def entries():
|
||||
for video_id in traverse_obj(json_content, (
|
||||
'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id'
|
||||
)):
|
||||
yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE)
|
||||
|
||||
return self.playlist_result(entries(), playlist_id)
|
||||
|
||||
|
||||
class CBCGemIE(InfoExtractor):
|
||||
IE_NAME = 'gem.cbc.ca'
|
||||
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
|
||||
|
@ -280,12 +340,12 @@ class CBCGemIE(InfoExtractor):
|
|||
data = json.dumps({'jwt': sig}).encode()
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
|
||||
None, data=data, headers=headers)
|
||||
None, data=data, headers=headers, expected_status=426)
|
||||
cbc_access_token = resp['accessToken']
|
||||
|
||||
headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
|
||||
resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
|
||||
None, headers=headers)
|
||||
None, headers=headers, expected_status=426)
|
||||
return resp['claimsToken']
|
||||
|
||||
def _get_claims_token_expiry(self):
|
||||
|
@ -417,6 +477,10 @@ class CBCGemPlaylistIE(InfoExtractor):
|
|||
'id': 'schitts-creek/s06',
|
||||
'title': 'Season 6',
|
||||
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
||||
'series': 'Schitt\'s Creek',
|
||||
'season_number': 6,
|
||||
'season': 'Season 6',
|
||||
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
||||
|
|
|
@ -101,6 +101,7 @@ class CBSIE(CBSBaseIE):
|
|||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Subscription required',
|
||||
}, {
|
||||
'url': 'https://www.cbs.com/shows/video/sZH1MGgomIosZgxGJ1l263MFq16oMtW1/',
|
||||
'info_dict': {
|
||||
|
@ -117,6 +118,7 @@ class CBSIE(CBSBaseIE):
|
|||
},
|
||||
'expected_warnings': [
|
||||
'This content expired on', 'No video formats found', 'Requested format is not available'],
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -7,9 +7,9 @@ import zlib
|
|||
from .anvato import AnvatoIE
|
||||
from .common import InfoExtractor
|
||||
from .paramountplus import ParamountPlusIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
HEADRequest,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
|
|
|
@ -90,10 +90,17 @@ class CCCPlaylistIE(InfoExtractor):
|
|||
'id': '30c3',
|
||||
},
|
||||
'playlist_count': 135,
|
||||
}, {
|
||||
'url': 'https://media.ccc.de/c/DS2023',
|
||||
'info_dict': {
|
||||
'title': 'Datenspuren 2023',
|
||||
'id': 'DS2023',
|
||||
},
|
||||
'playlist_count': 37
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url).lower()
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
conf = self._download_json(
|
||||
'https://media.ccc.de/public/conferences/' + playlist_id,
|
||||
|
|
|
@ -1,20 +1,20 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
|
||||
from ..networking import Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
sanitized_Request,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
USER_AGENTS,
|
||||
)
|
||||
|
||||
USER_AGENTS = {
|
||||
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
|
||||
}
|
||||
|
||||
|
||||
class CeskaTelevizeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||
|
@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||
parsed_url = compat_urllib_parse_urlparse(urlh.url)
|
||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
|
@ -163,16 +163,16 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||
entries = []
|
||||
|
||||
for user_agent in (None, USER_AGENTS['Safari']):
|
||||
req = sanitized_Request(
|
||||
req = Request(
|
||||
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
|
||||
data=urlencode_postdata(data))
|
||||
|
||||
req.add_header('Content-type', 'application/x-www-form-urlencoded')
|
||||
req.add_header('x-addr', '127.0.0.1')
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.headers['Content-type'] = 'application/x-www-form-urlencoded'
|
||||
req.headers['x-addr'] = '127.0.0.1'
|
||||
req.headers['X-Requested-With'] = 'XMLHttpRequest'
|
||||
if user_agent:
|
||||
req.add_header('User-Agent', user_agent)
|
||||
req.add_header('Referer', url)
|
||||
req.headers['User-Agent'] = user_agent
|
||||
req.headers['Referer'] = url
|
||||
|
||||
playlistpage = self._download_json(req, playlist_id, fatal=False)
|
||||
|
||||
|
@ -183,8 +183,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
|||
if playlist_url == 'error_region':
|
||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
||||
|
||||
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.add_header('Referer', url)
|
||||
req = Request(compat_urllib_parse_unquote(playlist_url))
|
||||
req.headers['Referer'] = url
|
||||
|
||||
playlist = self._download_json(req, playlist_id, fatal=False)
|
||||
if not playlist:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
import urllib.error
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
|
@ -40,7 +40,7 @@ class CinetecaMilanoIE(InfoExtractor):
|
|||
'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500)
|
||||
if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
|
||||
or isinstance(e.cause, json.JSONDecodeError)):
|
||||
self.raise_login_required(method='cookies')
|
||||
raise
|
||||
|
|
136
yt_dlp/extractor/cineverse.py
Normal file
136
yt_dlp/extractor/cineverse.py
Normal file
|
@ -0,0 +1,136 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CineverseBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
|
||||
'cineverse.com',
|
||||
'asiancrush.com',
|
||||
'dovechannel.com',
|
||||
'screambox.com',
|
||||
'midnightpulp.com',
|
||||
'fandor.com',
|
||||
'retrocrush.tv',
|
||||
)))
|
||||
|
||||
|
||||
class CineverseIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Women Who Flirt',
|
||||
'ext': 'mp4',
|
||||
'id': 'DMR00018919',
|
||||
'modified_timestamp': 1678744575289,
|
||||
'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
|
||||
'duration': 5811.597,
|
||||
'description': 'md5:892fd62a05611d394141e8394ace0bc6',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Archenemy! Crystal Bowie',
|
||||
'ext': 'mp4',
|
||||
'id': '1000000023016',
|
||||
'episode_number': 3,
|
||||
'season_number': 1,
|
||||
'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
|
||||
'age_limit': 0,
|
||||
'episode': 'Episode 3',
|
||||
'season': 'Season 1',
|
||||
'duration': 1485.067,
|
||||
'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
|
||||
'series': 'Space Adventure COBRA (Original Japanese)',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, default={})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
video_id = self._match_id(url)
|
||||
html = self._download_webpage(url, video_id)
|
||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||
|
||||
if idetails.get('err_code') == 1200:
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo restriction. '
|
||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||
countries=smuggled_data.get('geo_countries'))
|
||||
|
||||
return {
|
||||
'subtitles': filter_dict({
|
||||
'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
|
||||
}),
|
||||
'formats': self._extract_m3u8_formats(idetails['url'], video_id),
|
||||
**traverse_obj(idetails, {
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
'episode_number': ('details', 'episode', {int_or_none}),
|
||||
'age_limit': ('details', 'rating_code', {parse_age_limit}),
|
||||
'series': ('details', 'series_details', 'title'),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CineverseDetailsIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'title': 'Space Adventure COBRA (Original Japanese)',
|
||||
'id': '1000000023012',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
|
||||
'info_dict': {
|
||||
'id': 'NNVG4938',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hansel and Gretel',
|
||||
'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
|
||||
'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
|
||||
'duration': 7030.732,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, series_id = self._match_valid_url(url).group('host', 'id')
|
||||
html = self._download_webpage(url, series_id)
|
||||
pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
|
||||
|
||||
geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
|
||||
geoblocked = traverse_obj(pageprops, (
|
||||
'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
|
||||
|
||||
def item_result(item):
|
||||
item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
|
||||
if geoblocked:
|
||||
item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
|
||||
return self.url_result(item_url, CineverseIE)
|
||||
|
||||
season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
|
||||
if season:
|
||||
return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
|
||||
playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
|
||||
return item_result(pageprops['itemDetailsData'])
|
|
@ -33,7 +33,7 @@ class CiscoWebexIE(InfoExtractor):
|
|||
if rcid:
|
||||
webpage = self._download_webpage(url, None, note='Getting video ID')
|
||||
url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
|
||||
url = self._request_webpage(url, None, note='Resolving final URL').geturl()
|
||||
url = self._request_webpage(url, None, note='Resolving final URL').url
|
||||
mobj = self._match_valid_url(url)
|
||||
subdomain = mobj.group('subdomain')
|
||||
siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
|
||||
|
@ -49,7 +49,7 @@ class CiscoWebexIE(InfoExtractor):
|
|||
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
|
||||
video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
|
||||
|
||||
if urlh.getcode() == 403:
|
||||
if urlh.status == 403:
|
||||
if stream['code'] == 53004:
|
||||
self.raise_login_required()
|
||||
if stream['code'] == 53005:
|
||||
|
@ -59,7 +59,7 @@ class CiscoWebexIE(InfoExtractor):
|
|||
'This video is protected by a password, use the --video-password option', expected=True)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
|
||||
|
||||
if urlh.getcode() == 429:
|
||||
if urlh.status == 429:
|
||||
self.raise_login_required(
|
||||
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
|
||||
method='cookies')
|
||||
|
|
|
@ -19,6 +19,7 @@ class CNBCIE(InfoExtractor):
|
|||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -49,6 +50,7 @@ class CNBCVideoIE(InfoExtractor):
|
|||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Dead link',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -17,15 +17,26 @@ import subprocess
|
|||
import sys
|
||||
import time
|
||||
import types
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_os_name,
|
||||
urllib_req_to_req,
|
||||
)
|
||||
from ..cookies import LenientSimpleCookie
|
||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||
from ..downloader.hls import HlsFD
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..utils import (
|
||||
IDENTITY,
|
||||
JSON_LD_RE,
|
||||
|
@ -34,7 +45,6 @@ from ..utils import (
|
|||
FormatSorter,
|
||||
GeoRestrictedError,
|
||||
GeoUtils,
|
||||
HEADRequest,
|
||||
LenientJSONDecoder,
|
||||
Popen,
|
||||
RegexNotFoundError,
|
||||
|
@ -60,7 +70,6 @@ from ..utils import (
|
|||
js_to_json,
|
||||
mimetype2ext,
|
||||
netrc_from_content,
|
||||
network_exceptions,
|
||||
orderedSet,
|
||||
parse_bitrate,
|
||||
parse_codecs,
|
||||
|
@ -70,7 +79,6 @@ from ..utils import (
|
|||
parse_resolution,
|
||||
sanitize_filename,
|
||||
sanitize_url,
|
||||
sanitized_Request,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
|
@ -82,8 +90,6 @@ from ..utils import (
|
|||
unescapeHTML,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
update_Request,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
|
@ -224,7 +230,8 @@ class InfoExtractor:
|
|||
width : height ratio as float.
|
||||
* no_resume The server does not support resuming the
|
||||
(HTTP or RTMP) download. Boolean.
|
||||
* has_drm The format has DRM and cannot be downloaded. Boolean
|
||||
* has_drm True if the format has DRM and cannot be downloaded.
|
||||
'maybe' if the format may have DRM and has to be tested before download.
|
||||
* extra_param_to_segment_url A query string to append to each
|
||||
fragment's URL, or to update each existing query string
|
||||
with. Only applied by the native HLS/DASH downloaders.
|
||||
|
@ -722,11 +729,11 @@ class InfoExtractor:
|
|||
except UnsupportedError:
|
||||
raise
|
||||
except ExtractorError as e:
|
||||
e.video_id = e.video_id or self.get_temp_id(url),
|
||||
e.video_id = e.video_id or self.get_temp_id(url)
|
||||
e.ie = e.ie or self.IE_NAME,
|
||||
e.traceback = e.traceback or sys.exc_info()[2]
|
||||
raise
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
|
||||
except (KeyError, StopIteration) as e:
|
||||
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
|
||||
|
@ -785,20 +792,25 @@ class InfoExtractor:
|
|||
|
||||
@staticmethod
|
||||
def __can_accept_status_code(err, expected_status):
|
||||
assert isinstance(err, urllib.error.HTTPError)
|
||||
assert isinstance(err, HTTPError)
|
||||
if expected_status is None:
|
||||
return False
|
||||
elif callable(expected_status):
|
||||
return expected_status(err.code) is True
|
||||
return expected_status(err.status) is True
|
||||
else:
|
||||
return err.code in variadic(expected_status)
|
||||
return err.status in variadic(expected_status)
|
||||
|
||||
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
||||
if isinstance(url_or_request, urllib.request.Request):
|
||||
return update_Request(url_or_request, data=data, headers=headers, query=query)
|
||||
if query:
|
||||
url_or_request = update_url_query(url_or_request, query)
|
||||
return sanitized_Request(url_or_request, data, headers or {})
|
||||
self._downloader.deprecation_warning(
|
||||
'Passing a urllib.request.Request to _create_request() is deprecated. '
|
||||
'Use yt_dlp.networking.common.Request instead.')
|
||||
url_or_request = urllib_req_to_req(url_or_request)
|
||||
elif not isinstance(url_or_request, Request):
|
||||
url_or_request = Request(url_or_request)
|
||||
|
||||
url_or_request.update(data=data, headers=headers, query=query)
|
||||
return url_or_request
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
||||
"""
|
||||
|
@ -834,14 +846,9 @@ class InfoExtractor:
|
|||
try:
|
||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, urllib.error.HTTPError):
|
||||
if isinstance(err, HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
return err.response
|
||||
|
||||
if errnote is False:
|
||||
return False
|
||||
|
@ -973,11 +980,11 @@ class InfoExtractor:
|
|||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
if self.get_param('dump_intermediate_pages', False):
|
||||
self.to_screen('Dumping request to ' + urlh.geturl())
|
||||
self.to_screen('Dumping request to ' + urlh.url)
|
||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.geturl(), video_id)
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
|
@ -1035,7 +1042,7 @@ class InfoExtractor:
|
|||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.full_url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
|
@ -1109,7 +1116,7 @@ class InfoExtractor:
|
|||
while True:
|
||||
try:
|
||||
return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
|
||||
except http.client.IncompleteRead as e:
|
||||
except IncompleteRead as e:
|
||||
try_count += 1
|
||||
if try_count >= tries:
|
||||
raise e
|
||||
|
@ -1680,7 +1687,7 @@ class InfoExtractor:
|
|||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||
rectx = re.escape(context_name)
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){.*?\breturn\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
|
||||
js, arg_keys, arg_vals = self._search_regex(
|
||||
(rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
|
||||
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
|
||||
|
@ -1806,7 +1813,7 @@ class InfoExtractor:
|
|||
return []
|
||||
|
||||
manifest, urlh = res
|
||||
manifest_url = urlh.geturl()
|
||||
manifest_url = urlh.url
|
||||
|
||||
return self._parse_f4m_formats(
|
||||
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
|
||||
|
@ -1965,7 +1972,7 @@ class InfoExtractor:
|
|||
return [], {}
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.geturl()
|
||||
m3u8_url = urlh.url
|
||||
|
||||
return self._parse_m3u8_formats_and_subtitles(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
|
@ -1979,11 +1986,7 @@ class InfoExtractor:
|
|||
errnote=None, fatal=True, data=None, headers={}, query={},
|
||||
video_id=None):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
has_drm = re.search('|'.join([
|
||||
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
|
||||
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
|
||||
]), m3u8_doc)
|
||||
has_drm = HlsFD._has_drm(m3u8_doc)
|
||||
|
||||
def format_url(url):
|
||||
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
|
||||
|
@ -2245,18 +2248,10 @@ class InfoExtractor:
|
|||
if res is False:
|
||||
assert not fatal
|
||||
return [], {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
fmts = self._parse_smil_formats(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subs = self._parse_smil_subtitles(
|
||||
smil, namespace=namespace)
|
||||
|
||||
return fmts, subs
|
||||
return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params,
|
||||
namespace=self._parse_smil_namespace(smil))
|
||||
|
||||
def _extract_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs)
|
||||
|
@ -2270,7 +2265,7 @@ class InfoExtractor:
|
|||
return {}
|
||||
|
||||
smil, urlh = res
|
||||
smil_url = urlh.geturl()
|
||||
smil_url = urlh.url
|
||||
|
||||
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
|
||||
|
||||
|
@ -2282,9 +2277,8 @@ class InfoExtractor:
|
|||
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
|
||||
namespace = self._parse_smil_namespace(smil)
|
||||
|
||||
formats = self._parse_smil_formats(
|
||||
formats, subtitles = self._parse_smil_formats_and_subtitles(
|
||||
smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
|
||||
subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
|
||||
video_id = os.path.splitext(url_basename(smil_url))[0]
|
||||
title = None
|
||||
|
@ -2323,7 +2317,14 @@ class InfoExtractor:
|
|||
return self._search_regex(
|
||||
r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
|
||||
|
||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
def _parse_smil_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs)
|
||||
if subs:
|
||||
self._report_ignoring_subs('SMIL')
|
||||
return fmts
|
||||
|
||||
def _parse_smil_formats_and_subtitles(
|
||||
self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||
base = smil_url
|
||||
for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
|
||||
b = meta.get('base') or meta.get('httpBase')
|
||||
|
@ -2331,7 +2332,7 @@ class InfoExtractor:
|
|||
base = b
|
||||
break
|
||||
|
||||
formats = []
|
||||
formats, subtitles = [], {}
|
||||
rtmp_count = 0
|
||||
http_count = 0
|
||||
m3u8_count = 0
|
||||
|
@ -2379,8 +2380,9 @@ class InfoExtractor:
|
|||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
|
||||
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||
if len(m3u8_formats) == 1:
|
||||
m3u8_count += 1
|
||||
m3u8_formats[0].update({
|
||||
|
@ -2401,11 +2403,15 @@ class InfoExtractor:
|
|||
f4m_url += urllib.parse.urlencode(f4m_params)
|
||||
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||
elif src_ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False))
|
||||
mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(mpd_formats)
|
||||
self._merge_subtitles(mpd_subs, target=subtitles)
|
||||
elif re.search(r'\.ism/[Mm]anifest', src_url):
|
||||
formats.extend(self._extract_ism_formats(
|
||||
src_url, video_id, ism_id='mss', fatal=False))
|
||||
ism_formats, ism_subs = self._extract_ism_formats_and_subtitles(
|
||||
src_url, video_id, ism_id='mss', fatal=False)
|
||||
formats.extend(ism_formats)
|
||||
self._merge_subtitles(ism_subs, target=subtitles)
|
||||
elif src_url.startswith('http') and self._is_valid_url(src, video_id):
|
||||
http_count += 1
|
||||
formats.append({
|
||||
|
@ -2436,7 +2442,10 @@ class InfoExtractor:
|
|||
'format_note': 'SMIL storyboards',
|
||||
})
|
||||
|
||||
return formats
|
||||
smil_subs = self._parse_smil_subtitles(smil, namespace=namespace)
|
||||
self._merge_subtitles(smil_subs, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
|
||||
urls = []
|
||||
|
@ -2462,7 +2471,7 @@ class InfoExtractor:
|
|||
return []
|
||||
|
||||
xspf, urlh = res
|
||||
xspf_url = urlh.geturl()
|
||||
xspf_url = urlh.url
|
||||
|
||||
return self._parse_xspf(
|
||||
xspf, playlist_id, xspf_url=xspf_url,
|
||||
|
@ -2533,7 +2542,7 @@ class InfoExtractor:
|
|||
return [], {}
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.geturl()
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
|
@ -2919,7 +2928,7 @@ class InfoExtractor:
|
|||
if ism_doc is None:
|
||||
return [], {}
|
||||
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
|
||||
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
|
||||
|
||||
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
|
||||
"""
|
||||
|
|
|
@ -41,7 +41,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
|
|||
)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
|
||||
'url': 'https://www.hgtv.ca/video/bryan-inc/movie-night-popcorn-with-bryan/870923331648/',
|
||||
'info_dict': {
|
||||
'id': '870923331648',
|
||||
'ext': 'mp4',
|
||||
|
@ -54,6 +54,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
|
|||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
# FIXME: yt-dlp wrongly raises for geo restriction
|
||||
}, {
|
||||
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
|
||||
'only_matching': True,
|
||||
|
|
|
@ -4,7 +4,7 @@ import re
|
|||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
|
@ -113,7 +113,7 @@ class CrackleIE(InfoExtractor):
|
|||
errnote='Unable to download media JSON')
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
continue
|
||||
raise
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import base64
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
|
@ -27,11 +27,24 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
_AUTH_HEADERS = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_QUERY = {}
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
'': 'en-US',
|
||||
'es': 'es-419',
|
||||
'es-es': 'es-ES',
|
||||
'fr': 'fr-FR',
|
||||
'it': 'it-IT',
|
||||
'pt-br': 'pt-BR',
|
||||
'pt-pt': 'pt-PT',
|
||||
'ru': 'ru-RU',
|
||||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return self._get_cookies(self._BASE_URL).get('etp_rt')
|
||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
|
@ -62,49 +75,49 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
if not self.is_logged_in:
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _update_query(self, lang):
|
||||
if lang in CrunchyrollBaseIE._QUERY:
|
||||
return
|
||||
|
||||
webpage = self._download_webpage(
|
||||
f'{self._BASE_URL}/{lang}', None, note=f'Retrieving main page (lang={lang or None})')
|
||||
|
||||
initial_state = self._search_json(r'__INITIAL_STATE__\s*=', webpage, 'initial state', None)
|
||||
CrunchyrollBaseIE._QUERY[lang] = traverse_obj(initial_state, {
|
||||
'locale': ('localization', 'locale'),
|
||||
}) or None
|
||||
|
||||
if CrunchyrollBaseIE._BASIC_AUTH:
|
||||
return
|
||||
|
||||
app_config = self._search_json(r'__APP_CONFIG__\s*=', webpage, 'app config', None)
|
||||
cx_api_param = app_config['cxApiParams']['accountAuthClientId' if self.is_logged_in else 'anonClientId']
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||
return
|
||||
|
||||
assert CrunchyrollBaseIE._BASIC_AUTH, '_update_query needs to be called at least one time beforehand'
|
||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
|
||||
|
||||
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
|
||||
self._update_query(lang)
|
||||
self._update_auth()
|
||||
|
||||
if not endpoint.startswith('/'):
|
||||
endpoint = f'/{endpoint}'
|
||||
|
||||
query = query.copy()
|
||||
locale = self._locale_from_language(lang)
|
||||
if locale:
|
||||
query['locale'] = locale
|
||||
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query={**CrunchyrollBaseIE._QUERY[lang], **query})
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
|
||||
|
||||
def _call_api(self, path, internal_id, lang, note='api', query={}):
|
||||
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
|
||||
|
@ -114,7 +127,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
result = self._call_base_api(
|
||||
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, urllib.error.HTTPError) and error.cause.code == 404:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
|
||||
return None
|
||||
raise
|
||||
|
||||
|
@ -206,7 +219,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:beta\.|www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
(?:(?P<lang>\w{2}(?:-\w{2})?)/)?
|
||||
watch/(?!concert|musicvideo)(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
# Premium only
|
||||
|
@ -304,7 +317,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
},
|
||||
'playlist_mincount': 5,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y',
|
||||
'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
|
||||
|
@ -490,8 +503,21 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?crunchyroll\.com/
|
||||
(?P<lang>(?:\w{2}(?:-\w{2})?/)?)
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w{10})'''
|
||||
watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': 'MV5B02C79',
|
||||
'display_id': 'egaono-hana',
|
||||
'title': 'Egaono Hana',
|
||||
'track': 'Egaono Hana',
|
||||
'artist': 'Goose house',
|
||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||
'genre': ['J-Pop'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
|
@ -519,11 +545,14 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_ENDPOINT = 'music'
|
||||
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
HEADRequest,
|
||||
)
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CultureUnpluggedIE(InfoExtractor):
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import hashlib
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
classproperty,
|
||||
|
@ -105,7 +105,7 @@ class DacastVODIE(DacastBaseIE):
|
|||
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
|
||||
except ExtractorError as e:
|
||||
# CDN will randomly respond with 403
|
||||
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
|
|
|
@ -3,7 +3,7 @@ import json
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
|
@ -68,9 +68,9 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
|
|||
None, 'Downloading Access Token',
|
||||
data=urlencode_postdata(data))['access_token']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError(self._parse_json(
|
||||
e.cause.read().decode(), xid)['error_description'], expected=True)
|
||||
e.cause.response.read().decode(), xid)['error_description'], expected=True)
|
||||
raise
|
||||
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
|
||||
self._HEADERS['Authorization'] = 'Bearer ' + token
|
||||
|
|
|
@ -3,8 +3,8 @@ import string
|
|||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError
|
||||
from ..compat import compat_HTTPError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
|
@ -100,9 +100,9 @@ class DiscoveryIE(DiscoveryGoBaseIE):
|
|||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
e.cause.read().decode(), display_id)['description']
|
||||
e.cause.response.read().decode(), display_id)['description']
|
||||
if 'resource not available for country' in e_description:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
if 'Authorized Networks' in e_description:
|
||||
|
|
|
@ -1,31 +1,72 @@
|
|||
import time
|
||||
import hashlib
|
||||
import re
|
||||
import urllib
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class DouyuTVIE(InfoExtractor):
|
||||
IE_DESC = '斗鱼'
|
||||
class DouyuBaseIE(InfoExtractor):
|
||||
def _download_cryptojs_md5(self, video_id):
|
||||
for url in [
|
||||
'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
|
||||
]:
|
||||
js_code = self._download_webpage(
|
||||
url, video_id, note='Downloading signing dependency', fatal=False)
|
||||
if js_code:
|
||||
self.cache.store('douyu', 'crypto-js-md5', js_code)
|
||||
return js_code
|
||||
raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
|
||||
|
||||
def _get_cryptojs_md5(self, video_id):
|
||||
return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id)
|
||||
|
||||
def _calc_sign(self, sign_func, video_id, a):
|
||||
b = uuid.uuid4().hex
|
||||
c = round(time.time())
|
||||
js_script = f'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
|
||||
phantom = PhantomJSwrapper(self)
|
||||
result = phantom.execute(js_script, video_id,
|
||||
note='Executing JS signing script').strip()
|
||||
return {i: v[0] for i, v in urllib.parse.parse_qs(result).items()}
|
||||
|
||||
def _search_js_sign_func(self, webpage, fatal=True):
|
||||
# The greedy look-behind ensures last possible script tag is matched
|
||||
return self._search_regex(
|
||||
r'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage, 'JS sign func', fatal=fatal)
|
||||
|
||||
|
||||
class DouyuTVIE(DouyuBaseIE):
|
||||
IE_DESC = '斗鱼直播'
|
||||
_VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.douyutv.com/iseven',
|
||||
'url': 'https://www.douyu.com/pigff',
|
||||
'info_dict': {
|
||||
'id': '17732',
|
||||
'display_id': 'iseven',
|
||||
'ext': 'flv',
|
||||
'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r're:.*m7show@163\.com.*',
|
||||
'thumbnail': r're:^https?://.*\.png',
|
||||
'uploader': '7师傅',
|
||||
'id': '24422',
|
||||
'display_id': 'pigff',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': r'≥15级牌子看鱼吧置顶帖进粉丝vx群',
|
||||
'thumbnail': str,
|
||||
'uploader': 'pigff',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
|
@ -85,15 +126,43 @@ class DouyuTVIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_sign_func(self, room_id, video_id):
|
||||
return self._download_json(
|
||||
f'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id,
|
||||
note='Getting signing script')['data'][f'room{room_id}']
|
||||
|
||||
def _extract_stream_formats(self, stream_formats):
|
||||
formats = []
|
||||
for stream_info in traverse_obj(stream_formats, (..., 'data')):
|
||||
stream_url = urljoin(
|
||||
traverse_obj(stream_info, 'rtmp_url'), traverse_obj(stream_info, 'rtmp_live'))
|
||||
if stream_url:
|
||||
rate_id = traverse_obj(stream_info, ('rate', {int_or_none}))
|
||||
rate_info = traverse_obj(stream_info, ('multirates', lambda _, v: v['rate'] == rate_id), get_all=False)
|
||||
ext = determine_ext(stream_url)
|
||||
formats.append({
|
||||
'url': stream_url,
|
||||
'format_id': str_or_none(rate_id),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
'quality': rate_id % -10000 if rate_id is not None else None,
|
||||
**traverse_obj(rate_info, {
|
||||
'format': ('name', {str_or_none}),
|
||||
'tbr': ('bit', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
if video_id.isdigit():
|
||||
room_id = video_id
|
||||
else:
|
||||
page = self._download_webpage(url, video_id)
|
||||
room_id = self._html_search_regex(
|
||||
r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
room_id = self._search_regex(r'\$ROOM\.room_id\s*=\s*(\d+)', webpage, 'room id')
|
||||
|
||||
if self._search_regex(r'"videoLoop"\s*:\s*(\d+)', webpage, 'loop', default='') == '1':
|
||||
raise UserNotLive('The channel is auto-playing VODs', video_id=video_id)
|
||||
if self._search_regex(r'\$ROOM\.show_status\s*=\s*(\d+)', webpage, 'status', default='') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
# Grab metadata from API
|
||||
params = {
|
||||
|
@ -102,110 +171,136 @@ class DouyuTVIE(InfoExtractor):
|
|||
'time': int(time.time()),
|
||||
}
|
||||
params['auth'] = hashlib.md5(
|
||||
f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = self._download_json(
|
||||
f'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
|
||||
room = traverse_obj(self._download_json(
|
||||
f'http://www.douyutv.com/api/v1/room/{room_id}', video_id,
|
||||
note='Downloading room info', query=params)['data']
|
||||
note='Downloading room info', query=params, fatal=False), 'data')
|
||||
|
||||
# 1 = live, 2 = offline
|
||||
if room.get('show_status') == '2':
|
||||
raise ExtractorError('Live stream is offline', expected=True)
|
||||
if traverse_obj(room, 'show_status') == '2':
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL'))
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id)
|
||||
js_sign_func = self._search_js_sign_func(webpage, fatal=False) or self._get_sign_func(room_id, video_id)
|
||||
form_data = {
|
||||
'rate': 0,
|
||||
**self._calc_sign(js_sign_func, video_id, room_id),
|
||||
}
|
||||
stream_formats = [self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note="Downloading livestream format",
|
||||
data=urlencode_postdata(form_data))]
|
||||
|
||||
title = unescapeHTML(room['room_name'])
|
||||
description = room.get('show_details')
|
||||
thumbnail = room.get('room_src')
|
||||
uploader = room.get('nickname')
|
||||
for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')):
|
||||
if rate_id != traverse_obj(stream_formats[0], ('data', 'rate')):
|
||||
form_data['rate'] = rate_id
|
||||
stream_formats.append(self._download_json(
|
||||
f'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
|
||||
video_id, note=f'Downloading livestream format {rate_id}',
|
||||
data=urlencode_postdata(form_data)))
|
||||
|
||||
return {
|
||||
'id': room_id,
|
||||
'display_id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader,
|
||||
'formats': self._extract_stream_formats(stream_formats),
|
||||
'is_live': True,
|
||||
'subtitles': subs,
|
||||
'formats': formats,
|
||||
**traverse_obj(room, {
|
||||
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||
'title': ('room_name', {unescapeHTML}),
|
||||
'description': ('show_details', {str}),
|
||||
'uploader': ('nickname', {str}),
|
||||
'thumbnail': ('room_src', {url_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
class DouyuShowIE(InfoExtractor):
|
||||
class DouyuShowIE(DouyuBaseIE):
|
||||
_VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'md5': '0c2cfd068ee2afe657801269b2d86214',
|
||||
'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
|
||||
'info_dict': {
|
||||
'id': 'rjNBdvnVXNzvE2yw',
|
||||
'id': 'mPyq7oVNe5Yv1gLY',
|
||||
'ext': 'mp4',
|
||||
'title': '陈一发儿:砒霜 我有个室友系列!04-01 22点场',
|
||||
'duration': 7150.08,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': '陈一发儿',
|
||||
'uploader_id': 'XrZwYelr5wbK',
|
||||
'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
|
||||
'upload_date': '20170402',
|
||||
'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
|
||||
'duration': 633,
|
||||
'thumbnail': str,
|
||||
'uploader': '美食作家王刚V',
|
||||
'uploader_id': 'OVAO4NVx1m7Q',
|
||||
'timestamp': 1661850002,
|
||||
'upload_date': '20220830',
|
||||
'view_count': int,
|
||||
'tags': ['美食', '美食综合'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
'super': '原画',
|
||||
'high': '超清',
|
||||
'normal': '高清',
|
||||
}
|
||||
|
||||
_QUALITIES = {
|
||||
'super': -1,
|
||||
'high': -2,
|
||||
'normal': -3,
|
||||
}
|
||||
|
||||
_RESOLUTIONS = {
|
||||
'super': '1920x1080',
|
||||
'high': '1280x720',
|
||||
'normal': '852x480',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url = url.replace('vmobile.', 'v.')
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
room_info = self._parse_json(self._search_regex(
|
||||
r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
|
||||
video_info = self._search_json(
|
||||
r'<script>\s*window\.\$DATA\s*=', webpage,
|
||||
'video info', video_id, transform_source=js_to_json)
|
||||
|
||||
video_info = None
|
||||
js_sign_func = self._search_js_sign_func(webpage)
|
||||
form_data = {
|
||||
'vid': video_id,
|
||||
**self._calc_sign(js_sign_func, video_id, video_info['ROOM']['point_id']),
|
||||
}
|
||||
url_info = self._download_json(
|
||||
'https://v.douyu.com/api/stream/getStreamUrl', video_id,
|
||||
data=urlencode_postdata(form_data), note="Downloading video formats")
|
||||
|
||||
for trial in range(5):
|
||||
# Sometimes Douyu rejects our request. Let's try it more times
|
||||
try:
|
||||
video_info = self._download_json(
|
||||
'https://vmobile.douyu.com/video/getInfo', video_id,
|
||||
query={'vid': video_id},
|
||||
headers={
|
||||
'Referer': url,
|
||||
'x-requested-with': 'XMLHttpRequest',
|
||||
})
|
||||
break
|
||||
except ExtractorError:
|
||||
self._sleep(1, video_id)
|
||||
|
||||
if not video_info:
|
||||
raise ExtractorError('Can\'t fetch video info')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_info['data']['video_url'], video_id,
|
||||
entry_protocol='m3u8_native', ext='mp4')
|
||||
|
||||
upload_date = unified_strdate(self._html_search_regex(
|
||||
r'<em>上传时间:</em><span>([^<]+)</span>', webpage,
|
||||
'upload date', fatal=False))
|
||||
|
||||
uploader = uploader_id = uploader_url = None
|
||||
mobj = re.search(
|
||||
r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
|
||||
webpage)
|
||||
if mobj:
|
||||
uploader_id, uploader = mobj.groups()
|
||||
uploader_url = urljoin(url, '/author/' + uploader_id)
|
||||
formats = []
|
||||
for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)):
|
||||
video_url = traverse_obj(url, ('url', {url_or_none}))
|
||||
if video_url:
|
||||
ext = determine_ext(video_url)
|
||||
formats.append({
|
||||
'format': self._FORMATS.get(name),
|
||||
'format_id': name,
|
||||
'url': video_url,
|
||||
'quality': self._QUALITIES.get(name),
|
||||
'ext': 'mp4' if ext == 'm3u8' else ext,
|
||||
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
|
||||
**parse_resolution(self._RESOLUTIONS.get(name))
|
||||
})
|
||||
else:
|
||||
self.to_screen(
|
||||
f'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': room_info['name'],
|
||||
'formats': formats,
|
||||
'duration': room_info.get('duration'),
|
||||
'thumbnail': room_info.get('pic'),
|
||||
'upload_date': upload_date,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': uploader_url,
|
||||
**traverse_obj(video_info, ('DATA', {
|
||||
'title': ('content', 'title', {str}),
|
||||
'uploader': ('content', 'author', {str}),
|
||||
'uploader_id': ('content', 'up_id', {str_or_none}),
|
||||
'duration': ('content', 'video_duration', {int_or_none}),
|
||||
'thumbnail': ('content', 'video_pic', {url_or_none}),
|
||||
'timestamp': ('content', 'create_time', {int_or_none}),
|
||||
'view_count': ('content', 'view_num', {int_or_none}),
|
||||
'tags': ('videoTag', ..., 'tagName', {str}),
|
||||
}))
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue