mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-17 21:59:17 +00:00
Merge branch 'master' into master
This commit is contained in:
commit
5b9a30f258
1
.github/PULL_REQUEST_TEMPLATE.md
vendored
1
.github/PULL_REQUEST_TEMPLATE.md
vendored
|
@ -28,7 +28,6 @@ # PLEASE FOLLOW THE GUIDE BELOW
|
|||
### Before submitting a *pull request* make sure you have:
|
||||
- [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions)
|
||||
- [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
|
||||
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions)
|
||||
|
||||
### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply:
|
||||
- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/)
|
||||
|
|
42
.github/workflows/build.yml
vendored
42
.github/workflows/build.yml
vendored
|
@ -237,7 +237,7 @@ jobs:
|
|||
macos:
|
||||
needs: process
|
||||
if: inputs.macos
|
||||
runs-on: macos-11
|
||||
runs-on: macos-12
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
@ -260,11 +260,23 @@ jobs:
|
|||
--pre -d curl_cffi_whls \
|
||||
-r requirements.txt
|
||||
done
|
||||
( # Overwrite x86_64-only libs with fat/universal2 libs or else Pyinstaller will do the opposite
|
||||
# See https://github.com/yt-dlp/yt-dlp/pull/10069
|
||||
cd curl_cffi_whls
|
||||
mkdir -p curl_cffi/.dylibs
|
||||
python_libdir=$(python3 -c 'import sys; from pathlib import Path; print(Path(sys.path[1]).parent)')
|
||||
for dylib in lib{ssl,crypto}.3.dylib; do
|
||||
cp "${python_libdir}/${dylib}" "curl_cffi/.dylibs/${dylib}"
|
||||
for wheel in curl_cffi*macos*x86_64.whl; do
|
||||
zip "${wheel}" "curl_cffi/.dylibs/${dylib}"
|
||||
done
|
||||
done
|
||||
)
|
||||
python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2
|
||||
python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2
|
||||
cd curl_cffi_universal2
|
||||
for wheel in *cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done
|
||||
python3 -m pip install -U --user *cffi*.whl
|
||||
for wheel in ./*cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done
|
||||
python3 -m pip install -U --user ./*cffi*.whl
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -311,7 +323,7 @@ jobs:
|
|||
# Hack to get the latest patch version. Uncomment if needed
|
||||
#brew install python@3.10
|
||||
#export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 )
|
||||
curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg"
|
||||
curl "https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg" -o "python.pkg"
|
||||
sudo installer -pkg python.pkg -target /
|
||||
python3 --version
|
||||
- name: Install Requirements
|
||||
|
@ -360,8 +372,8 @@ jobs:
|
|||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include py2exe --include curl-cffi
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
python devscripts/install_deps.py --include curl-cffi
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.7.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -369,12 +381,20 @@ jobs:
|
|||
python devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python -m bundle.py2exe
|
||||
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
|
||||
python -m bundle.pyinstaller
|
||||
python -m bundle.pyinstaller --onedir
|
||||
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe
|
||||
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
|
||||
|
||||
- name: Install Requirements (py2exe)
|
||||
run: |
|
||||
python devscripts/install_deps.py --include py2exe
|
||||
- name: Build (py2exe)
|
||||
run: |
|
||||
python -m bundle.py2exe
|
||||
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
|
||||
Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe
|
||||
|
||||
- name: Verify --update-to
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
run: |
|
||||
|
@ -413,7 +433,7 @@ jobs:
|
|||
run: |
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -467,8 +487,8 @@ jobs:
|
|||
run: |
|
||||
cd ./artifact/
|
||||
# make sure SHA sums are also printed to stdout
|
||||
sha256sum * | tee ../SHA2-256SUMS
|
||||
sha512sum * | tee ../SHA2-512SUMS
|
||||
sha256sum -- * | tee ../SHA2-256SUMS
|
||||
sha512sum -- * | tee ../SHA2-512SUMS
|
||||
|
||||
- name: Make Update spec
|
||||
run: |
|
||||
|
|
2
.github/workflows/core.yml
vendored
2
.github/workflows/core.yml
vendored
|
@ -53,7 +53,7 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
|
||||
run: python3 ./devscripts/install_deps.py --include test --include curl-cffi
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
run: |
|
||||
|
|
16
.github/workflows/quick-test.yml
vendored
16
.github/workflows/quick-test.yml
vendored
|
@ -15,13 +15,13 @@ jobs:
|
|||
with:
|
||||
python-version: '3.8'
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
run: python3 ./devscripts/install_deps.py --include test
|
||||
- name: Run tests
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true
|
||||
python3 ./devscripts/run_tests.py core
|
||||
flake8:
|
||||
name: Linter
|
||||
check:
|
||||
name: Code check
|
||||
if: "!contains(github.event.head_commit.message, 'ci skip all')"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
@ -29,9 +29,11 @@ jobs:
|
|||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- name: Install flake8
|
||||
run: python3 ./devscripts/install_deps.py -o --include dev
|
||||
- name: Install dev dependencies
|
||||
run: python3 ./devscripts/install_deps.py -o --include static-analysis
|
||||
- name: Make lazy extractors
|
||||
run: python3 ./devscripts/make_lazy_extractors.py
|
||||
- name: Run flake8
|
||||
run: flake8 .
|
||||
- name: Run ruff
|
||||
run: ruff check --output-format github .
|
||||
- name: Run autopep8
|
||||
run: autopep8 --diff .
|
||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -67,7 +67,7 @@ cookies
|
|||
# Python
|
||||
*.pyc
|
||||
*.pyo
|
||||
.pytest_cache
|
||||
.*_cache
|
||||
wine-py2exe/
|
||||
py2exe.log
|
||||
build/
|
||||
|
|
14
.pre-commit-config.yaml
Normal file
14
.pre-commit-config.yaml
Normal file
|
@ -0,0 +1,14 @@
|
|||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: linter
|
||||
name: Apply linter fixes
|
||||
entry: ruff check --fix .
|
||||
language: system
|
||||
types: [python]
|
||||
require_serial: true
|
||||
- id: format
|
||||
name: Apply formatting fixes
|
||||
entry: autopep8 --in-place .
|
||||
language: system
|
||||
types: [python]
|
9
.pre-commit-hatch.yaml
Normal file
9
.pre-commit-hatch.yaml
Normal file
|
@ -0,0 +1,9 @@
|
|||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: fix
|
||||
name: Apply code fixes
|
||||
entry: hatch fmt
|
||||
language: system
|
||||
types: [python]
|
||||
require_serial: true
|
|
@ -134,18 +134,53 @@ ### Is the website primarily used for piracy?
|
|||
|
||||
# DEVELOPER INSTRUCTIONS
|
||||
|
||||
Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation).
|
||||
Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases), get them via [the other installation methods](README.md#installation) or directly run it using `python -m yt_dlp`.
|
||||
|
||||
To run yt-dlp as a developer, you don't need to build anything either. Simply execute
|
||||
`yt-dlp` uses [`hatch`](<https://hatch.pypa.io>) as a project management tool.
|
||||
You can easily install it using [`pipx`](<https://pipx.pypa.io>) via `pipx install hatch`, or else via `pip` or your package manager of choice. Make sure you are using at least version `1.10.0`, otherwise some functionality might not work as expected.
|
||||
|
||||
python3 -m yt_dlp
|
||||
If you plan on contributing to `yt-dlp`, best practice is to start by running the following command:
|
||||
|
||||
To run all the available core tests, use:
|
||||
```shell
|
||||
$ hatch run setup
|
||||
```
|
||||
|
||||
python3 devscripts/run_tests.py
|
||||
The above command will install a `pre-commit` hook so that required checks/fixes (linting, formatting) will run automatically before each commit. If any code needs to be linted or formatted, then the commit will be blocked and the necessary changes will be made; you should review all edits and re-commit the fixed version.
|
||||
|
||||
After this you can use `hatch shell` to enable a virtual environment that has `yt-dlp` and its development dependencies installed.
|
||||
|
||||
In addition, the following script commands can be used to run simple tasks such as linting or testing (without having to run `hatch shell` first):
|
||||
* `hatch fmt`: Automatically fix linter violations and apply required code formatting changes
|
||||
* See `hatch fmt --help` for more info
|
||||
* `hatch test`: Run extractor or core tests
|
||||
* See `hatch test --help` for more info
|
||||
|
||||
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
|
||||
|
||||
While it is strongly recommended to use `hatch` for yt-dlp development, if you are unable to do so, alternatively you can manually create a virtual environment and use the following commands:
|
||||
|
||||
```shell
|
||||
# To only install development dependencies:
|
||||
$ python -m devscripts.install_deps --include dev
|
||||
|
||||
# Or, for an editable install plus dev dependencies:
|
||||
$ python -m pip install -e ".[default,dev]"
|
||||
|
||||
# To setup the pre-commit hook:
|
||||
$ pre-commit install
|
||||
|
||||
# To be used in place of `hatch test`:
|
||||
$ python -m devscripts.run_tests
|
||||
|
||||
# To be used in place of `hatch fmt`:
|
||||
$ ruff check --fix .
|
||||
$ autopep8 --in-place .
|
||||
|
||||
# To only check code instead of applying fixes:
|
||||
$ ruff check .
|
||||
$ autopep8 --diff .
|
||||
```
|
||||
|
||||
If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile).
|
||||
|
||||
|
||||
|
@ -165,12 +200,16 @@ ## Adding support for a new site
|
|||
1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork)
|
||||
1. Check out the source code with:
|
||||
|
||||
git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git
|
||||
```shell
|
||||
$ git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git
|
||||
```
|
||||
|
||||
1. Start a new git branch with
|
||||
|
||||
cd yt-dlp
|
||||
git checkout -b yourextractor
|
||||
```shell
|
||||
$ cd yt-dlp
|
||||
$ git checkout -b yourextractor
|
||||
```
|
||||
|
||||
1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`:
|
||||
|
||||
|
@ -217,21 +256,27 @@ ## Adding support for a new site
|
|||
# TODO more properties (see yt_dlp/extractor/common.py)
|
||||
}
|
||||
```
|
||||
1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`.
|
||||
1. Run `python3 devscripts/run_tests.py YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all`
|
||||
1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. Also note that when adding a parenthesized import group, the last import in the group must have a trailing comma in order for this formatting to be respected by our code formatter.
|
||||
1. Run `hatch test YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all`
|
||||
1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
|
||||
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want.
|
||||
1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
|
||||
1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions), passes [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started) code checks and is properly formatted:
|
||||
|
||||
$ flake8 yt_dlp/extractor/yourextractor.py
|
||||
```shell
|
||||
$ hatch fmt --check
|
||||
```
|
||||
|
||||
You can use `hatch fmt` to automatically fix problems.
|
||||
|
||||
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python.
|
||||
1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
|
||||
|
||||
$ git add yt_dlp/extractor/_extractors.py
|
||||
$ git add yt_dlp/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add extractor'
|
||||
$ git push origin yourextractor
|
||||
```shell
|
||||
$ git add yt_dlp/extractor/_extractors.py
|
||||
$ git add yt_dlp/extractor/yourextractor.py
|
||||
$ git commit -m '[yourextractor] Add extractor'
|
||||
$ git push origin yourextractor
|
||||
```
|
||||
|
||||
1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
|
||||
|
||||
|
|
21
CONTRIBUTORS
21
CONTRIBUTORS
|
@ -610,3 +610,24 @@ Offert4324
|
|||
sta1us
|
||||
Tomoka1
|
||||
trwstin
|
||||
alexhuot1
|
||||
clienthax
|
||||
DaPotato69
|
||||
emqi
|
||||
hugohaa
|
||||
imanoreotwe
|
||||
JakeFinley96
|
||||
lostfictions
|
||||
minamotorin
|
||||
ocococococ
|
||||
Podiumnoche
|
||||
RasmusAntons
|
||||
roeniss
|
||||
shoxie007
|
||||
Szpachlarz
|
||||
The-MAGI
|
||||
TuxCoder
|
||||
voidful
|
||||
vtexier
|
||||
WyohKnott
|
||||
trueauracoral
|
||||
|
|
121
Changelog.md
121
Changelog.md
|
@ -4,6 +4,127 @@ # Changelog
|
|||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2024.05.27
|
||||
|
||||
#### Extractor changes
|
||||
- [Fix parsing of base URL in SMIL manifest](https://github.com/yt-dlp/yt-dlp/commit/26603d0b34898818992bee4598e0607c07059511) ([#9225](https://github.com/yt-dlp/yt-dlp/issues/9225)) by [seproDev](https://github.com/seproDev)
|
||||
- **peertube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/12b248ce60be1aa1362edd839d915bba70dbee4b) ([#10044](https://github.com/yt-dlp/yt-dlp/issues/10044)) by [bashonly](https://github.com/bashonly), [trueauracoral](https://github.com/trueauracoral)
|
||||
- **piksel**: [Update domain](https://github.com/yt-dlp/yt-dlp/commit/ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b) ([#9223](https://github.com/yt-dlp/yt-dlp/issues/9223)) by [seproDev](https://github.com/seproDev)
|
||||
- **tiktok**: user: [Fix extraction loop](https://github.com/yt-dlp/yt-dlp/commit/c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079) ([#10035](https://github.com/yt-dlp/yt-dlp/issues/10035)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Misc. changes
|
||||
- **cleanup**: Miscellaneous: [5e3e19c](https://github.com/yt-dlp/yt-dlp/commit/5e3e19c93c52830da98d9d1ed84ea7a559efefbd) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2024.05.26
|
||||
|
||||
#### Core changes
|
||||
- [Better warning when requested subs format not found](https://github.com/yt-dlp/yt-dlp/commit/7e4259dff0b681a3f0e8a930799ce0394328c86e) ([#9873](https://github.com/yt-dlp/yt-dlp/issues/9873)) by [DaPotato69](https://github.com/DaPotato69)
|
||||
- [Merged with youtube-dl a08f2b7](https://github.com/yt-dlp/yt-dlp/commit/a4da9db87b6486b270c15dfa07ab5bfedc83f6bd) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||
- [Warn if lack of ffmpeg alters format selection](https://github.com/yt-dlp/yt-dlp/commit/96da9525043f78aca4544d01761b13b2140e9ae6) ([#9805](https://github.com/yt-dlp/yt-dlp/issues/9805)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
|
||||
- **cookies**
|
||||
- [Add `--cookies-from-browser` support for Whale](https://github.com/yt-dlp/yt-dlp/commit/dd9ad97b1fbdd36c086b8ba82328a4d954f78f8e) ([#9649](https://github.com/yt-dlp/yt-dlp/issues/9649)) by [roeniss](https://github.com/roeniss)
|
||||
- [Get chrome session cookies with `--cookies-from-browser`](https://github.com/yt-dlp/yt-dlp/commit/f1f158976e38d38a260762accafe7bbe6d451151) ([#9747](https://github.com/yt-dlp/yt-dlp/issues/9747)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier)
|
||||
- **windows**: [Improve shell quoting and tests](https://github.com/yt-dlp/yt-dlp/commit/64766459e37451b665c1464073c28361fbcf1c25) ([#9802](https://github.com/yt-dlp/yt-dlp/issues/9802)) by [Grub4K](https://github.com/Grub4K) (With fixes in [7e26bd5](https://github.com/yt-dlp/yt-dlp/commit/7e26bd53f9c5893518fde81dfd0079ec08dd841e))
|
||||
|
||||
#### Extractor changes
|
||||
- [Add POST data hash to `--write-pages` filenames](https://github.com/yt-dlp/yt-dlp/commit/61b17437dc14a1c7e90ff48a6198df77828c6df4) ([#9879](https://github.com/yt-dlp/yt-dlp/issues/9879)) by [minamotorin](https://github.com/minamotorin) (With fixes in [c999bac](https://github.com/yt-dlp/yt-dlp/commit/c999bac02c5a4f755b2a82488a975e91c988ffd8) by [bashonly](https://github.com/bashonly))
|
||||
- [Make `_search_nextjs_data` non fatal](https://github.com/yt-dlp/yt-dlp/commit/3ee1194288981c4f2c4abd8315326de0c424d2ce) ([#8937](https://github.com/yt-dlp/yt-dlp/issues/8937)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **afreecatv**: live: [Add `cdn` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/315b3544296bb83012e20ee3af9d3cbf5600dd1c) ([#9666](https://github.com/yt-dlp/yt-dlp/issues/9666)) by [bashonly](https://github.com/bashonly)
|
||||
- **alura**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc2879ecb05aaad36869609d154e4321362c1f63) ([#9658](https://github.com/yt-dlp/yt-dlp/issues/9658)) by [hugohaa](https://github.com/hugohaa)
|
||||
- **artetv**: [Label forced subtitles](https://github.com/yt-dlp/yt-dlp/commit/7b5674949fd03a33b47b67b31d56a5adf1c48c91) ([#9945](https://github.com/yt-dlp/yt-dlp/issues/9945)) by [vtexier](https://github.com/vtexier)
|
||||
- **bbc**: [Fix and extend extraction](https://github.com/yt-dlp/yt-dlp/commit/7975ddf245d22af034d5b983eeb1c5ec6c2ce053) ([#9705](https://github.com/yt-dlp/yt-dlp/issues/9705)) by [dirkf](https://github.com/dirkf), [kylegustavo](https://github.com/kylegustavo), [pukkandan](https://github.com/pukkandan)
|
||||
- **bilibili**: [Fix `--geo-verification-proxy` support](https://github.com/yt-dlp/yt-dlp/commit/2338827072dacab0f15348b70aec8685feefc8d1) ([#9817](https://github.com/yt-dlp/yt-dlp/issues/9817)) by [fireattack](https://github.com/fireattack)
|
||||
- **bilibilispacevideo**
|
||||
- [Better error message](https://github.com/yt-dlp/yt-dlp/commit/06d52c87314e0bbc16c43c405090843885577b88) ([#9839](https://github.com/yt-dlp/yt-dlp/issues/9839)) by [fireattack](https://github.com/fireattack)
|
||||
- [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4cc99d7b6cce8b39506ead01407445d576b63ee4) ([#9905](https://github.com/yt-dlp/yt-dlp/issues/9905)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **boosty**: [Add cookies support](https://github.com/yt-dlp/yt-dlp/commit/145dc6f6563e80d2da1b3e9aea2ffa795b71622c) ([#9522](https://github.com/yt-dlp/yt-dlp/issues/9522)) by [RasmusAntons](https://github.com/RasmusAntons)
|
||||
- **brilliantpala**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/eead3bbc01f6529862bdad1f0b2adeabda4f006e) ([#9788](https://github.com/yt-dlp/yt-dlp/issues/9788)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **canalalpha**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/00a9f2e1f7fa69499221f2e8dd73a08efeef79bc) ([#9675](https://github.com/yt-dlp/yt-dlp/issues/9675)) by [kclauhk](https://github.com/kclauhk)
|
||||
- **cbc.ca**: player: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8bf48f3a8fa29587e7c73ef5a7710385a5ea725) ([#9866](https://github.com/yt-dlp/yt-dlp/issues/9866)) by [carusocr](https://github.com/carusocr)
|
||||
- **cda**: [Fix age-gated web extraction](https://github.com/yt-dlp/yt-dlp/commit/6d8a53d870ff6795f509085bfbf3981417999038) ([#9939](https://github.com/yt-dlp/yt-dlp/issues/9939)) by [dirkf](https://github.com/dirkf), [emqi](https://github.com/emqi), [Podiumnoche](https://github.com/Podiumnoche), [Szpachlarz](https://github.com/Szpachlarz)
|
||||
- **commonmistakes**: [Raise error on blob URLs](https://github.com/yt-dlp/yt-dlp/commit/98d71d8c5e5dab08b561ee6f137e968d2a004262) ([#9897](https://github.com/yt-dlp/yt-dlp/issues/9897)) by [seproDev](https://github.com/seproDev)
|
||||
- **crunchyroll**
|
||||
- [Always make metadata available](https://github.com/yt-dlp/yt-dlp/commit/cb2fb4a643949322adba561ca73bcba3221ec0c5) ([#9772](https://github.com/yt-dlp/yt-dlp/issues/9772)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix auth and remove cookies support](https://github.com/yt-dlp/yt-dlp/commit/ff38a011d57b763f3a69bebd25a5dc9044a717ce) ([#9749](https://github.com/yt-dlp/yt-dlp/issues/9749)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix stream extraction](https://github.com/yt-dlp/yt-dlp/commit/f2816634e3be88fe158b342ee33918de3c272a54) ([#10005](https://github.com/yt-dlp/yt-dlp/issues/10005)) by [bashonly](https://github.com/bashonly)
|
||||
- [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/5904853ae5788509fdc4892cb7ecdfa9ae7f78e6) ([#9857](https://github.com/yt-dlp/yt-dlp/issues/9857)) by [bashonly](https://github.com/bashonly)
|
||||
- **dangalplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0d067e77c3f5527946fb0c22ee1c7011994cba40) ([#10021](https://github.com/yt-dlp/yt-dlp/issues/10021)) by [bashonly](https://github.com/bashonly)
|
||||
- **discoveryplus**: [Fix dmax.de and related extractors](https://github.com/yt-dlp/yt-dlp/commit/90d2da311bbb5dc06f385ee428c7e4590936e995) ([#10020](https://github.com/yt-dlp/yt-dlp/issues/10020)) by [bashonly](https://github.com/bashonly)
|
||||
- **eplus**: [Handle URLs without videos](https://github.com/yt-dlp/yt-dlp/commit/351dc0bc334c4e1b5f00c152818c3ec0ed71f788) ([#9855](https://github.com/yt-dlp/yt-dlp/issues/9855)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **europarlwebstream**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/800a43983e5fb719526ce4cb3956216085c63268) ([#9647](https://github.com/yt-dlp/yt-dlp/issues/9647)) by [seproDev](https://github.com/seproDev), [voidful](https://github.com/voidful)
|
||||
- **facebook**: [Fix DASH formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e3b42d8b1b8bcfff7ba146c19fc3f6f6ba843cea) ([#9734](https://github.com/yt-dlp/yt-dlp/issues/9734)) by [bashonly](https://github.com/bashonly)
|
||||
- **godresource**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65e709d23530959075816e966c42179ad46e8e3b) ([#9629](https://github.com/yt-dlp/yt-dlp/issues/9629)) by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
- **googledrive**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/85ec2a337ac325cf6427cbafd56f0a034c1a5218) ([#9908](https://github.com/yt-dlp/yt-dlp/issues/9908)) by [WyohKnott](https://github.com/WyohKnott)
|
||||
- **hearthisat**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5bbfdb7c999b22f1aeca0c3489c167d6eb73013b) ([#9949](https://github.com/yt-dlp/yt-dlp/issues/9949)) by [bohwaz](https://github.com/bohwaz), [seproDev](https://github.com/seproDev)
|
||||
- **hytale**: [Use `CloudflareStreamIE` explicitly](https://github.com/yt-dlp/yt-dlp/commit/31b417e1d1ccc67d5c027bf8878f483dc34cb118) ([#9672](https://github.com/yt-dlp/yt-dlp/issues/9672)) by [llamasblade](https://github.com/llamasblade)
|
||||
- **instagram**: [Support `/reels/` URLs](https://github.com/yt-dlp/yt-dlp/commit/06cb0638392b607b47d3c2ac48eb2ebecb0f060d) ([#9539](https://github.com/yt-dlp/yt-dlp/issues/9539)) by [amir16yp](https://github.com/amir16yp)
|
||||
- **jiocinema**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1463945ae5fb05986a0bd1aa02e41d1a08d93a02) ([#10026](https://github.com/yt-dlp/yt-dlp/issues/10026)) by [bashonly](https://github.com/bashonly)
|
||||
- **jiosaavn**: [Extract via API and fix playlists](https://github.com/yt-dlp/yt-dlp/commit/0c21c53885cf03f4040467ae8c44d7ff51016116) ([#9656](https://github.com/yt-dlp/yt-dlp/issues/9656)) by [bashonly](https://github.com/bashonly)
|
||||
- **lci**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a2eebc76770fca91ffabeff658d560f716fec80) ([#10025](https://github.com/yt-dlp/yt-dlp/issues/10025)) by [ocococococ](https://github.com/ocococococ)
|
||||
- **mixch**: [Extract comments](https://github.com/yt-dlp/yt-dlp/commit/b38018b781b062d5169d104ab430489aef8e7f1e) ([#9860](https://github.com/yt-dlp/yt-dlp/issues/9860)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **moviepilot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/296df0da1d38a44d34c99b60a18066c301774537) ([#9366](https://github.com/yt-dlp/yt-dlp/issues/9366)) by [panatexxa](https://github.com/panatexxa)
|
||||
- **netease**: program: [Improve `--no-playlist` message](https://github.com/yt-dlp/yt-dlp/commit/73f12119b52d98281804b0c072b2ed6aa841ec88) ([#9488](https://github.com/yt-dlp/yt-dlp/issues/9488)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **nfb**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0a1a8e3005f66c44bf67633dccd4df19c3fccd1a) ([#9650](https://github.com/yt-dlp/yt-dlp/issues/9650)) by [rrgomes](https://github.com/rrgomes)
|
||||
- **ntslive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be7db1a5a8c483726c511c30ea4689cbb8b27962) ([#9641](https://github.com/yt-dlp/yt-dlp/issues/9641)) by [lostfictions](https://github.com/lostfictions)
|
||||
- **orf**: on: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/0dd53faeca2ba0ce138e4092d07b5f2dbf2422f9) ([#9677](https://github.com/yt-dlp/yt-dlp/issues/9677)) by [TuxCoder](https://github.com/TuxCoder)
|
||||
- **orftvthek**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/3779f2a307ba3ef1d28e107cdd71b221dfb4eb36) ([#10011](https://github.com/yt-dlp/yt-dlp/issues/10011)) by [seproDev](https://github.com/seproDev)
|
||||
- **patreon**
|
||||
- [Extract multiple embeds](https://github.com/yt-dlp/yt-dlp/commit/036e0d92c6052465673d459678322ea03e61483d) ([#9850](https://github.com/yt-dlp/yt-dlp/issues/9850)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix Vimeo embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c9ce57d9bf51541da2381d99bc096a9d0ddf1f27) ([#9712](https://github.com/yt-dlp/yt-dlp/issues/9712)) by [bashonly](https://github.com/bashonly)
|
||||
- **piapro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3ba8de62d61d782256f5c1e9939a0762039657de) ([#9311](https://github.com/yt-dlp/yt-dlp/issues/9311)) by [FinnRG](https://github.com/FinnRG), [seproDev](https://github.com/seproDev)
|
||||
- **pornhub**: [Fix login by email address](https://github.com/yt-dlp/yt-dlp/commit/518c1afc1592cae3e4eb39dc646b5bc059333112) ([#9914](https://github.com/yt-dlp/yt-dlp/issues/9914)) by [feederbox826](https://github.com/feederbox826)
|
||||
- **qub**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b54cccdcb892bca3e55993480d8b86f1c7e6da6) ([#7019](https://github.com/yt-dlp/yt-dlp/issues/7019)) by [alexhuot1](https://github.com/alexhuot1), [dirkf](https://github.com/dirkf)
|
||||
- **reddit**: [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/82f4f4444e26daf35b7302c406fe2312f78f619e) ([#10006](https://github.com/yt-dlp/yt-dlp/issues/10006)) by [kclauhk](https://github.com/kclauhk)
|
||||
- **soundcloud**
|
||||
- [Add `formats` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/beaf832c7a9d57833f365ce18f6115b88071b296) ([#10004](https://github.com/yt-dlp/yt-dlp/issues/10004)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||
- [Extract `genres`](https://github.com/yt-dlp/yt-dlp/commit/231c2eacc41b06b65c63edf94c0d04768a5da607) ([#9821](https://github.com/yt-dlp/yt-dlp/issues/9821)) by [bashonly](https://github.com/bashonly)
|
||||
- **taptap**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/63b569bc5e7d461753637a20ad84a575adee4c0a) ([#9776](https://github.com/yt-dlp/yt-dlp/issues/9776)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **tele5**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d) ([#10024](https://github.com/yt-dlp/yt-dlp/issues/10024)) by [bashonly](https://github.com/bashonly)
|
||||
- **theatercomplextown**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/8056a3026ed6ec6a6d0ed56fdd7ebcd16e928341) ([#9754](https://github.com/yt-dlp/yt-dlp/issues/9754)) by [bashonly](https://github.com/bashonly)
|
||||
- **tiktok**
|
||||
- [Add `device_id` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/3584b8390bd21c0393a3079eeee71aed56a1c1d8) ([#9951](https://github.com/yt-dlp/yt-dlp/issues/9951)) by [bashonly](https://github.com/bashonly)
|
||||
- [Extract all web formats](https://github.com/yt-dlp/yt-dlp/commit/4ccd73fea0f6f4be343e1ec7f22dd03799addcf8) ([#9960](https://github.com/yt-dlp/yt-dlp/issues/9960)) by [bashonly](https://github.com/bashonly)
|
||||
- [Extract via mobile API only if extractor-arg is passed](https://github.com/yt-dlp/yt-dlp/commit/41ba4a808b597a3afed78c89675a30deb6844450) ([#9938](https://github.com/yt-dlp/yt-dlp/issues/9938)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/eef1e9f44ff14c5e65b759bb1eafa3946cdaf719) ([#9961](https://github.com/yt-dlp/yt-dlp/issues/9961)) by [bashonly](https://github.com/bashonly)
|
||||
- collection: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/119d41f27061d220d276a2d38cfc8d873437452a) ([#9986](https://github.com/yt-dlp/yt-dlp/issues/9986)) by [bashonly](https://github.com/bashonly), [imanoreotwe](https://github.com/imanoreotwe)
|
||||
- user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/347f13dd9bccc2b4db3ea25689410d45d8370ed4) ([#9661](https://github.com/yt-dlp/yt-dlp/issues/9661)) by [bashonly](https://github.com/bashonly)
|
||||
- **tv5monde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6db96268c521e945d42649607db1574f5d92e082) ([#9143](https://github.com/yt-dlp/yt-dlp/issues/9143)) by [alard](https://github.com/alard), [seproDev](https://github.com/seproDev)
|
||||
- **twitter**
|
||||
- [Fix auth for x.com migration](https://github.com/yt-dlp/yt-dlp/commit/3e35aa32c74bc108375be8c8b6b3bfc90dfff1b4) ([#9952](https://github.com/yt-dlp/yt-dlp/issues/9952)) by [bashonly](https://github.com/bashonly)
|
||||
- [Support x.com URLs](https://github.com/yt-dlp/yt-dlp/commit/4813173e4544f125d6f2afc31e600727d761b8dd) ([#9926](https://github.com/yt-dlp/yt-dlp/issues/9926)) by [bashonly](https://github.com/bashonly)
|
||||
- **vk**: [Improve format extraction](https://github.com/yt-dlp/yt-dlp/commit/df5c9e733aaba703cf285c0372b6d61629330c82) ([#9885](https://github.com/yt-dlp/yt-dlp/issues/9885)) by [seproDev](https://github.com/seproDev)
|
||||
- **wrestleuniverse**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/c4853655cb9a793129280806af643de43c48f4d5) ([#9800](https://github.com/yt-dlp/yt-dlp/issues/9800)) by [bashonly](https://github.com/bashonly)
|
||||
- **xiaohongshu**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2e9031605d87c469be9ce98dbbdf4960b727338) ([#9646](https://github.com/yt-dlp/yt-dlp/issues/9646)) by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
- **xvideos**: quickies: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b207d26f83fb8ab0ce56df74dff43ff583a3264f) ([#9834](https://github.com/yt-dlp/yt-dlp/issues/9834)) by [JakeFinley96](https://github.com/JakeFinley96)
|
||||
- **youporn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/351368cb9a6731b886a58f5a10fd6b302bbe47be) ([#8827](https://github.com/yt-dlp/yt-dlp/issues/8827)) by [The-MAGI](https://github.com/The-MAGI)
|
||||
- **youtube**
|
||||
- [Add `mediaconnect` client](https://github.com/yt-dlp/yt-dlp/commit/cf212d0a331aba05c32117573f760cdf3af8c62f) ([#9546](https://github.com/yt-dlp/yt-dlp/issues/9546)) by [clienthax](https://github.com/clienthax)
|
||||
- [Extract upload timestamp if available](https://github.com/yt-dlp/yt-dlp/commit/96a134dea6397a5f2131947c427aac52c8b4e677) ([#9856](https://github.com/yt-dlp/yt-dlp/issues/9856)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/8e15177b4113c355989881e4e030f695a9b59c3a) ([#9775](https://github.com/yt-dlp/yt-dlp/issues/9775)) by [bbilly1](https://github.com/bbilly1), [jakeogh](https://github.com/jakeogh), [minamotorin](https://github.com/minamotorin), [shoxie007](https://github.com/shoxie007)
|
||||
- [Remove `android` from default clients](https://github.com/yt-dlp/yt-dlp/commit/12d8ea8246fa901de302ff5cc748caddadc82f41) ([#9553](https://github.com/yt-dlp/yt-dlp/issues/9553)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
|
||||
- **zenyandex**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c4b87dd885ee5391e5f481e7c8bd550a7c543623) ([#9813](https://github.com/yt-dlp/yt-dlp/issues/9813)) by [src-tinkerer](https://github.com/src-tinkerer)
|
||||
|
||||
#### Networking changes
|
||||
- [Add `extensions` attribute to `Response`](https://github.com/yt-dlp/yt-dlp/commit/bec9a59e8ec82c18e3bf9268eaa436793dd52e35) ([#9756](https://github.com/yt-dlp/yt-dlp/issues/9756)) by [bashonly](https://github.com/bashonly)
|
||||
- **Request Handler**
|
||||
- requests
|
||||
- [Patch support for `requests` 2.32.2+](https://github.com/yt-dlp/yt-dlp/commit/3f7999533ebe41c2a579d91b4e4cb211cfcd3bc0) ([#9992](https://github.com/yt-dlp/yt-dlp/issues/9992)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [Update to `requests` 2.32.0](https://github.com/yt-dlp/yt-dlp/commit/c36513f1be2ef3d3cec864accbffda1afaa06ffd) ([#9980](https://github.com/yt-dlp/yt-dlp/issues/9980)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
#### Misc. changes
|
||||
- [Add `hatch`, `ruff`, `pre-commit` and improve dev docs](https://github.com/yt-dlp/yt-dlp/commit/e897bd8292a41999cf51dba91b390db5643c72db) ([#7409](https://github.com/yt-dlp/yt-dlp/issues/7409)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
|
||||
- **build**
|
||||
- [Migrate `linux_exe` to static musl builds](https://github.com/yt-dlp/yt-dlp/commit/ac817bc83efd939dca3e40c4b527d0ccfc77172b) ([#9811](https://github.com/yt-dlp/yt-dlp/issues/9811)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||
- [Normalize `curl_cffi` group to `curl-cffi`](https://github.com/yt-dlp/yt-dlp/commit/02483bea1c4dbe1bace8ca4d19700104fbb8a00f) ([#9698](https://github.com/yt-dlp/yt-dlp/issues/9698)) by [bashonly](https://github.com/bashonly) (With fixes in [89f535e](https://github.com/yt-dlp/yt-dlp/commit/89f535e2656964b4061c25a7739d4d6ba0a30568))
|
||||
- [Run `macos_legacy` job on `macos-12`](https://github.com/yt-dlp/yt-dlp/commit/1a366403d9c26b992faa77e00f4d02ead57559e3) ([#9804](https://github.com/yt-dlp/yt-dlp/issues/9804)) by [bashonly](https://github.com/bashonly)
|
||||
- [`macos` job requires `setuptools<70`](https://github.com/yt-dlp/yt-dlp/commit/78c57cc0e0998b8ed90e4306f410aa4be4115cd7) ([#9993](https://github.com/yt-dlp/yt-dlp/issues/9993)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**
|
||||
- [Remove questionable extractors](https://github.com/yt-dlp/yt-dlp/commit/01395a34345d1c6ba1b73ca92f94dd200dc45341) ([#9911](https://github.com/yt-dlp/yt-dlp/issues/9911)) by [seproDev](https://github.com/seproDev)
|
||||
- Miscellaneous: [5c019f6](https://github.com/yt-dlp/yt-dlp/commit/5c019f6328ad40d66561eac3c4de0b3cd070d0f6), [ae2af11](https://github.com/yt-dlp/yt-dlp/commit/ae2af1104f80caf2f47544763a33db2c17a3e1de) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
|
||||
- **test**
|
||||
- [Add HTTP proxy tests](https://github.com/yt-dlp/yt-dlp/commit/3c7a287e281d9f9a353dce8902ff78a84c24a040) ([#9578](https://github.com/yt-dlp/yt-dlp/issues/9578)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Fix connect timeout test](https://github.com/yt-dlp/yt-dlp/commit/53b4d44f55cca66ac33dab092ef2a30b1164b684) ([#9906](https://github.com/yt-dlp/yt-dlp/issues/9906)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
### 2024.04.09
|
||||
|
||||
#### Important changes
|
||||
|
|
11
Makefile
11
Makefile
|
@ -27,7 +27,7 @@ clean-dist:
|
|||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
|
||||
clean-cache:
|
||||
find . \( \
|
||||
-type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
|
||||
-type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
|
||||
\) -prune -exec rm -rf {} \;
|
||||
|
||||
completion-bash: completions/bash/yt-dlp
|
||||
|
@ -70,14 +70,15 @@ uninstall:
|
|||
rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish
|
||||
|
||||
codetest:
|
||||
flake8 .
|
||||
ruff check .
|
||||
autopep8 --diff .
|
||||
|
||||
test:
|
||||
$(PYTHON) -m pytest
|
||||
$(PYTHON) -m pytest -Werror
|
||||
$(MAKE) codetest
|
||||
|
||||
offlinetest: codetest
|
||||
$(PYTHON) -m pytest -k "not download"
|
||||
$(PYTHON) -m pytest -Werror -m "not download"
|
||||
|
||||
CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
|
||||
CODE_FOLDERS != $(CODE_FOLDERS_CMD)
|
||||
|
@ -151,7 +152,7 @@ yt-dlp.tar.gz: all
|
|||
--exclude '*.pyo' \
|
||||
--exclude '*~' \
|
||||
--exclude '__pycache__' \
|
||||
--exclude '.pytest_cache' \
|
||||
--exclude '.*_cache' \
|
||||
--exclude '.git' \
|
||||
-- \
|
||||
README.md supportedsites.md Changelog.md LICENSE \
|
||||
|
|
49
README.md
49
README.md
|
@ -108,7 +108,6 @@ #### Alternatives
|
|||
[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary
|
||||
[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows))
|
||||
[yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary
|
||||
[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update)
|
||||
[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary
|
||||
[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary
|
||||
[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update)
|
||||
|
@ -170,7 +169,7 @@ # To update to nightly from stable executable/binary:
|
|||
yt-dlp --update-to nightly
|
||||
|
||||
# To install nightly with pip:
|
||||
python3 -m pip install -U --pre yt-dlp[default]
|
||||
python3 -m pip install -U --pre "yt-dlp[default]"
|
||||
```
|
||||
|
||||
## DEPENDENCIES
|
||||
|
@ -202,7 +201,7 @@ #### Impersonation
|
|||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||
|
||||
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
|
||||
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
|
||||
|
||||
|
||||
|
@ -263,7 +262,7 @@ ### Platform-independent Binary (UNIX)
|
|||
|
||||
### Standalone Py2Exe Builds (Windows)
|
||||
|
||||
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
|
||||
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and need VC++14** on the target computer to run.
|
||||
|
||||
If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
|
||||
|
||||
|
@ -402,6 +401,9 @@ ## Network Options:
|
|||
--impersonate CLIENT[:OS] Client to impersonate for requests. E.g.
|
||||
chrome, chrome-110, chrome:windows-10. Pass
|
||||
--impersonate="" to impersonate any client.
|
||||
Note that forcing impersonation for all
|
||||
requests may have a detrimental impact on
|
||||
download speed and stability
|
||||
--list-impersonate-targets List available clients to impersonate.
|
||||
-4, --force-ipv4 Make all connections via IPv4
|
||||
-6, --force-ipv6 Make all connections via IPv6
|
||||
|
@ -666,16 +668,17 @@ ## Filesystem Options:
|
|||
The name of the browser to load cookies
|
||||
from. Currently supported browsers are:
|
||||
brave, chrome, chromium, edge, firefox,
|
||||
opera, safari, vivaldi. Optionally, the
|
||||
KEYRING used for decrypting Chromium cookies
|
||||
on Linux, the name/path of the PROFILE to
|
||||
load cookies from, and the CONTAINER name
|
||||
(if Firefox) ("none" for no container) can
|
||||
be given with their respective seperators.
|
||||
By default, all containers of the most
|
||||
recently accessed profile are used.
|
||||
Currently supported keyrings are: basictext,
|
||||
gnomekeyring, kwallet, kwallet5, kwallet6
|
||||
opera, safari, vivaldi, whale. Optionally,
|
||||
the KEYRING used for decrypting Chromium
|
||||
cookies on Linux, the name/path of the
|
||||
PROFILE to load cookies from, and the
|
||||
CONTAINER name (if Firefox) ("none" for no
|
||||
container) can be given with their
|
||||
respective seperators. By default, all
|
||||
containers of the most recently accessed
|
||||
profile are used. Currently supported
|
||||
keyrings are: basictext, gnomekeyring,
|
||||
kwallet, kwallet5, kwallet6
|
||||
--no-cookies-from-browser Do not load cookies from browser (default)
|
||||
--cache-dir DIR Location in the filesystem where yt-dlp can
|
||||
store some downloaded information (such as
|
||||
|
@ -1751,7 +1754,7 @@ # Replace all spaces and "_" in title and uploader with a `-`
|
|||
|
||||
# EXTRACTOR ARGUMENTS
|
||||
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"`
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;formats=incomplete" --extractor-args "funimation:version=uncut"`
|
||||
|
||||
Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"`
|
||||
|
||||
|
@ -1760,7 +1763,7 @@ # EXTRACTOR ARGUMENTS
|
|||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
|
@ -1813,8 +1816,9 @@ #### tiktok
|
|||
* `app_name`: Default app name to use with mobile API calls, e.g. `trill`
|
||||
* `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
|
||||
* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
|
||||
* `aid`: Default app ID to use with API calls, e.g. `1180`
|
||||
* `app_info`: One or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
||||
* `aid`: Default app ID to use with mobile API calls, e.g. `1180`
|
||||
* `app_info`: Enable mobile API extraction with one or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
||||
* `device_id`: Enable mobile API extraction with a genuine device ID to be used with mobile API calls. Default is a random 19-digit string
|
||||
|
||||
#### rokfinchannel
|
||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
@ -1834,12 +1838,18 @@ #### nhkradirulive (NHK らじる★らじる LIVE)
|
|||
#### nflplusreplay
|
||||
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
|
||||
|
||||
#### jiocinema
|
||||
* `refresh_token`: The `refreshToken` UUID from browser local storage can be passed to extend the life of your login session when logging in with `token` as username and the `accessToken` from browser local storage as password
|
||||
|
||||
#### jiosaavn
|
||||
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
|
||||
|
||||
#### afreecatvlive
|
||||
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
|
||||
|
||||
#### soundcloud
|
||||
* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
|
@ -2116,7 +2126,7 @@ # CHANGES FROM YOUTUBE-DL
|
|||
|
||||
### New features
|
||||
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@a08f2b7**](https://github.com/ytdl-org/youtube-dl/commit/a08f2b7e4567cdc50c0614ee0a4ffdff49b8b6e6) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
|
||||
|
||||
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
|
||||
|
||||
|
@ -2326,6 +2336,7 @@ #### No longer supported
|
|||
--write-annotations No supported site has annotations now
|
||||
--no-write-annotations Default
|
||||
--compat-options seperate-video-versions No longer needed
|
||||
--compat-options no-youtube-prefer-utc-upload-date No longer supported
|
||||
|
||||
#### Removed
|
||||
These options were deprecated since 2014 and have now been entirely removed
|
||||
|
|
|
@ -147,5 +147,27 @@
|
|||
"action": "add",
|
||||
"when": "9590cc6b4768e190183d7d071a6c78170889116a",
|
||||
"short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly."
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "41ba4a808b597a3afed78c89675a30deb6844450",
|
||||
"short": "[ie/tiktok] Extract via mobile API only if extractor-arg is passed (#9938)",
|
||||
"authors": ["bashonly"]
|
||||
},
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "6e36d17f404556f0e3a43f441c477a71a91877d9"
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "beaf832c7a9d57833f365ce18f6115b88071b296",
|
||||
"short": "[ie/soundcloud] Add `formats` extractor-arg (#10004)",
|
||||
"authors": ["bashonly", "Grub4K"]
|
||||
},
|
||||
{
|
||||
"action": "change",
|
||||
"when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6",
|
||||
"short": "[cleanup] Misc (#9765)",
|
||||
"authors": ["bashonly", "Grub4K", "seproDev"]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -42,17 +42,25 @@ def parse_args():
|
|||
def main():
|
||||
args = parse_args()
|
||||
project_table = parse_toml(read_file(args.input))['project']
|
||||
recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<group_name>[\w-]+)\]')
|
||||
optional_groups = project_table['optional-dependencies']
|
||||
excludes = args.exclude or []
|
||||
|
||||
def yield_deps(group):
|
||||
for dep in group:
|
||||
if mobj := recursive_pattern.fullmatch(dep):
|
||||
yield from optional_groups.get(mobj.group('group_name'), [])
|
||||
else:
|
||||
yield dep
|
||||
|
||||
targets = []
|
||||
if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
|
||||
targets.extend(project_table['dependencies'])
|
||||
if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
|
||||
targets.extend(optional_groups['default'])
|
||||
targets.extend(yield_deps(optional_groups['default']))
|
||||
|
||||
for include in filter(None, map(optional_groups.get, args.include or [])):
|
||||
targets.extend(include)
|
||||
targets.extend(yield_deps(include))
|
||||
|
||||
targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes]
|
||||
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
@echo off
|
||||
|
||||
>&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead
|
||||
python %~dp0run_tests.py %~1
|
|
@ -4,6 +4,7 @@
|
|||
import functools
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
@ -18,6 +19,8 @@ def parse_args():
|
|||
'test', help='a extractor tests, or one of "core" or "download"', nargs='*')
|
||||
parser.add_argument(
|
||||
'-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
|
||||
parser.add_argument(
|
||||
'--pytest-args', help='arguments to passthrough to pytest')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
@ -26,15 +29,16 @@ def run_tests(*tests, pattern=None, ci=False):
|
|||
run_download = 'download' in tests
|
||||
tests = list(map(fix_test_name, tests))
|
||||
|
||||
arguments = ['pytest', '-Werror', '--tb=short']
|
||||
pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
|
||||
arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)]
|
||||
if ci:
|
||||
arguments.append('--color=yes')
|
||||
if pattern:
|
||||
arguments.extend(['-k', pattern])
|
||||
if run_core:
|
||||
arguments.extend(['-m', 'not download'])
|
||||
elif run_download:
|
||||
arguments.extend(['-m', 'download'])
|
||||
elif pattern:
|
||||
arguments.extend(['-k', pattern])
|
||||
else:
|
||||
arguments.extend(
|
||||
f'test/test_download.py::TestDownload::test_{test}' for test in tests)
|
||||
|
@ -46,13 +50,13 @@ def run_tests(*tests, pattern=None, ci=False):
|
|||
pass
|
||||
|
||||
arguments = [sys.executable, '-Werror', '-m', 'unittest']
|
||||
if pattern:
|
||||
arguments.extend(['-k', pattern])
|
||||
if run_core:
|
||||
print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True)
|
||||
return 1
|
||||
elif run_download:
|
||||
arguments.append('test.test_download')
|
||||
elif pattern:
|
||||
arguments.extend(['-k', pattern])
|
||||
else:
|
||||
arguments.extend(
|
||||
f'test.test_download.TestDownload.test_{test}' for test in tests)
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
>&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead'
|
||||
python3 devscripts/run_tests.py "$1"
|
17
pyinst.py
17
pyinst.py
|
@ -1,17 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
import warnings
|
||||
|
||||
from bundle.pyinstaller import main
|
||||
|
||||
warnings.warn(DeprecationWarning('`pyinst.py` is deprecated and will be removed in a future version. '
|
||||
'Use `bundle.pyinstaller` instead'))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
165
pyproject.toml
165
pyproject.toml
|
@ -62,18 +62,28 @@ build = [
|
|||
"build",
|
||||
"hatchling",
|
||||
"pip",
|
||||
"setuptools",
|
||||
"wheel",
|
||||
]
|
||||
dev = [
|
||||
"flake8",
|
||||
"isort",
|
||||
"pytest",
|
||||
"pre-commit",
|
||||
"yt-dlp[static-analysis]",
|
||||
"yt-dlp[test]",
|
||||
]
|
||||
static-analysis = [
|
||||
"autopep8~=2.0",
|
||||
"ruff~=0.4.4",
|
||||
]
|
||||
test = [
|
||||
"pytest~=8.1",
|
||||
]
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.3; sys_platform!='darwin'",
|
||||
"pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi
|
||||
"pyinstaller>=6.7.0", # for compat with setuptools>=70
|
||||
]
|
||||
py2exe = [
|
||||
"py2exe>=0.12",
|
||||
"requests==2.31.*",
|
||||
]
|
||||
py2exe = ["py2exe>=0.12"]
|
||||
|
||||
[project.urls]
|
||||
Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
|
||||
|
@ -122,3 +132,146 @@ artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
|
|||
[tool.hatch.version]
|
||||
path = "yt_dlp/version.py"
|
||||
pattern = "_pkg_version = '(?P<version>[^']+)'"
|
||||
|
||||
[tool.hatch.envs.default]
|
||||
features = ["curl-cffi", "default"]
|
||||
dependencies = ["pre-commit"]
|
||||
path = ".venv"
|
||||
installer = "uv"
|
||||
|
||||
[tool.hatch.envs.default.scripts]
|
||||
setup = "pre-commit install --config .pre-commit-hatch.yaml"
|
||||
yt-dlp = "python -Werror -Xdev -m yt_dlp {args}"
|
||||
|
||||
[tool.hatch.envs.hatch-static-analysis]
|
||||
detached = true
|
||||
features = ["static-analysis"]
|
||||
dependencies = [] # override hatch ruff version
|
||||
config-path = "pyproject.toml"
|
||||
|
||||
[tool.hatch.envs.hatch-static-analysis.scripts]
|
||||
format-check = "autopep8 --diff {args:.}"
|
||||
format-fix = "autopep8 --in-place {args:.}"
|
||||
lint-check = "ruff check {args:.}"
|
||||
lint-fix = "ruff check --fix {args:.}"
|
||||
|
||||
[tool.hatch.envs.hatch-test]
|
||||
features = ["test"]
|
||||
dependencies = [
|
||||
"pytest-randomly~=3.15",
|
||||
"pytest-rerunfailures~=14.0",
|
||||
"pytest-xdist[psutil]~=3.5",
|
||||
]
|
||||
|
||||
[tool.hatch.envs.hatch-test.scripts]
|
||||
run = "python -m devscripts.run_tests {args}"
|
||||
run-cov = "echo Code coverage not implemented && exit 1"
|
||||
|
||||
[[tool.hatch.envs.hatch-test.matrix]]
|
||||
python = [
|
||||
"3.8",
|
||||
"3.9",
|
||||
"3.10",
|
||||
"3.11",
|
||||
"3.12",
|
||||
"pypy3.8",
|
||||
"pypy3.9",
|
||||
"pypy3.10",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
|
||||
[tool.ruff.lint]
|
||||
ignore = [
|
||||
"E402", # module level import not at top of file
|
||||
"E501", # line too long
|
||||
"E731", # do not assign a lambda expression, use a def
|
||||
"E741", # ambiguous variable name
|
||||
]
|
||||
select = [
|
||||
"E", # pycodestyle errors
|
||||
"W", # pycodestyle warnings
|
||||
"F", # pyflakes
|
||||
"I", # import order
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"devscripts/lazy_load_template.py" = ["F401"]
|
||||
"!yt_dlp/extractor/**.py" = ["I"]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
known-first-party = [
|
||||
"bundle",
|
||||
"devscripts",
|
||||
"test",
|
||||
]
|
||||
relative-imports-order = "closest-to-furthest"
|
||||
|
||||
[tool.autopep8]
|
||||
max_line_length = 120
|
||||
recursive = true
|
||||
exit-code = true
|
||||
jobs = 0
|
||||
select = [
|
||||
"E101",
|
||||
"E112",
|
||||
"E113",
|
||||
"E115",
|
||||
"E116",
|
||||
"E117",
|
||||
"E121",
|
||||
"E122",
|
||||
"E123",
|
||||
"E124",
|
||||
"E125",
|
||||
"E126",
|
||||
"E127",
|
||||
"E128",
|
||||
"E129",
|
||||
"E131",
|
||||
"E201",
|
||||
"E202",
|
||||
"E203",
|
||||
"E211",
|
||||
"E221",
|
||||
"E222",
|
||||
"E223",
|
||||
"E224",
|
||||
"E225",
|
||||
"E226",
|
||||
"E227",
|
||||
"E228",
|
||||
"E231",
|
||||
"E241",
|
||||
"E242",
|
||||
"E251",
|
||||
"E252",
|
||||
"E261",
|
||||
"E262",
|
||||
"E265",
|
||||
"E266",
|
||||
"E271",
|
||||
"E272",
|
||||
"E273",
|
||||
"E274",
|
||||
"E275",
|
||||
"E301",
|
||||
"E302",
|
||||
"E303",
|
||||
"E304",
|
||||
"E305",
|
||||
"E306",
|
||||
"E502",
|
||||
"E701",
|
||||
"E702",
|
||||
"E704",
|
||||
"W391",
|
||||
"W504",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "-ra -v --strict-markers"
|
||||
markers = [
|
||||
"download",
|
||||
]
|
||||
|
|
|
@ -14,12 +14,6 @@ remove-duplicate-keys = true
|
|||
remove-unused-variables = true
|
||||
|
||||
|
||||
[tool:pytest]
|
||||
addopts = -ra -v --strict-markers
|
||||
markers =
|
||||
download
|
||||
|
||||
|
||||
[tox:tox]
|
||||
skipsdist = true
|
||||
envlist = py{38,39,310,311,312},pypy{38,39,310}
|
||||
|
|
36
setup.py
36
setup.py
|
@ -1,36 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
import warnings
|
||||
|
||||
|
||||
if sys.argv[1:2] == ['py2exe']:
|
||||
warnings.warn(DeprecationWarning('`setup.py py2exe` is deprecated and will be removed in a future version. '
|
||||
'Use `bundle.py2exe` instead'))
|
||||
|
||||
import bundle.py2exe
|
||||
|
||||
bundle.py2exe.main()
|
||||
|
||||
elif 'build_lazy_extractors' in sys.argv:
|
||||
warnings.warn(DeprecationWarning('`setup.py build_lazy_extractors` is deprecated and will be removed in a future version. '
|
||||
'Use `devscripts.make_lazy_extractors` instead'))
|
||||
|
||||
import subprocess
|
||||
|
||||
os.chdir(sys.path[0])
|
||||
print('running build_lazy_extractors')
|
||||
subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
|
||||
|
||||
else:
|
||||
|
||||
print(
|
||||
'ERROR: Building by calling `setup.py` is deprecated. '
|
||||
'Use a build frontend like `build` instead. ',
|
||||
'Refer to https://build.pypa.io for more info', file=sys.stderr)
|
||||
sys.exit(1)
|
|
@ -14,7 +14,6 @@ # Supported sites
|
|||
- **6play**
|
||||
- **7plus**
|
||||
- **8tracks**
|
||||
- **91porn**
|
||||
- **9c9media**
|
||||
- **9gag**: 9GAG
|
||||
- **9News**
|
||||
|
@ -220,7 +219,7 @@ # Supported sites
|
|||
- **BusinessInsider**
|
||||
- **BuzzFeed**
|
||||
- **BYUtv**: (**Currently broken**)
|
||||
- **CableAV**
|
||||
- **CaffeineTV**
|
||||
- **Callin**
|
||||
- **Caltrans**
|
||||
- **CAM4**
|
||||
|
@ -333,6 +332,8 @@ # Supported sites
|
|||
- **DailyWirePodcast**
|
||||
- **damtomo:record**
|
||||
- **damtomo:video**
|
||||
- **dangalplay**: [*dangalplay*](## "netrc machine")
|
||||
- **dangalplay:season**: [*dangalplay*](## "netrc machine")
|
||||
- **daum.net**
|
||||
- **daum.net:clip**
|
||||
- **daum.net:playlist**
|
||||
|
@ -396,7 +397,6 @@ # Supported sites
|
|||
- **EinsUndEinsTV**: [*1und1tv*](## "netrc machine")
|
||||
- **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine")
|
||||
- **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine")
|
||||
- **Einthusan**
|
||||
- **eitb.tv**
|
||||
- **ElementorEmbed**
|
||||
- **Elonet**
|
||||
|
@ -498,6 +498,7 @@ # Supported sites
|
|||
- **GameStar**
|
||||
- **Gaskrank**
|
||||
- **Gazeta**: (**Currently broken**)
|
||||
- **GBNews**: GB News clips, features and live streams
|
||||
- **GDCVault**: [*gdcvault*](## "netrc machine") (**Currently broken**)
|
||||
- **GediDigital**
|
||||
- **gem.cbc.ca**: [*cbcgem*](## "netrc machine")
|
||||
|
@ -527,6 +528,7 @@ # Supported sites
|
|||
- **GMANetworkVideo**
|
||||
- **Go**
|
||||
- **GoDiscovery**
|
||||
- **GodResource**
|
||||
- **GodTube**: (**Currently broken**)
|
||||
- **Gofile**
|
||||
- **Golem**
|
||||
|
@ -630,11 +632,11 @@ # Supported sites
|
|||
- **iwara:user**: [*iwara*](## "netrc machine")
|
||||
- **Ixigua**
|
||||
- **Izlesene**
|
||||
- **Jable**
|
||||
- **JablePlaylist**
|
||||
- **Jamendo**
|
||||
- **JamendoAlbum**
|
||||
- **JeuxVideo**: (**Currently broken**)
|
||||
- **jiocinema**: [*jiocinema*](## "netrc machine")
|
||||
- **jiocinema:series**: [*jiocinema*](## "netrc machine")
|
||||
- **jiosaavn:album**
|
||||
- **jiosaavn:playlist**
|
||||
- **jiosaavn:song**
|
||||
|
@ -974,6 +976,7 @@ # Supported sites
|
|||
- **NRKTVSeason**
|
||||
- **NRKTVSeries**
|
||||
- **NRLTV**: (**Currently broken**)
|
||||
- **nts.live**
|
||||
- **ntv.ru**
|
||||
- **NubilesPorn**: [*nubiles-porn*](## "netrc machine")
|
||||
- **nuum:live**
|
||||
|
@ -1015,7 +1018,6 @@ # Supported sites
|
|||
- **orf:on**
|
||||
- **orf:podcast**
|
||||
- **orf:radio**
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **OsnatelTV**: [*osnateltv*](## "netrc machine")
|
||||
- **OsnatelTVLive**: [*osnateltv*](## "netrc machine")
|
||||
- **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine")
|
||||
|
@ -1394,6 +1396,10 @@ # Supported sites
|
|||
- **SztvHu**
|
||||
- **t-online.de**: (**Currently broken**)
|
||||
- **Tagesschau**: (**Currently broken**)
|
||||
- **TapTapApp**
|
||||
- **TapTapAppIntl**
|
||||
- **TapTapMoment**
|
||||
- **TapTapPostIntl**
|
||||
- **Tass**: (**Currently broken**)
|
||||
- **TBS**
|
||||
- **TBSJPEpisode**
|
||||
|
@ -1412,7 +1418,7 @@ # Supported sites
|
|||
- **TedSeries**
|
||||
- **TedTalk**
|
||||
- **Tele13**
|
||||
- **Tele5**: (**Currently broken**)
|
||||
- **Tele5**
|
||||
- **TeleBruxelles**
|
||||
- **TelecaribePlay**
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
|
@ -1452,11 +1458,12 @@ # Supported sites
|
|||
- **ThreeSpeak**
|
||||
- **ThreeSpeakUser**
|
||||
- **TikTok**
|
||||
- **tiktok:collection**
|
||||
- **tiktok:effect**: (**Currently broken**)
|
||||
- **tiktok:live**
|
||||
- **tiktok:sound**: (**Currently broken**)
|
||||
- **tiktok:tag**: (**Currently broken**)
|
||||
- **tiktok:user**: (**Currently broken**)
|
||||
- **tiktok:user**
|
||||
- **TLC**
|
||||
- **TMZ**
|
||||
- **TNAFlix**
|
||||
|
@ -1501,7 +1508,7 @@ # Supported sites
|
|||
- **tv2play.hu**
|
||||
- **tv2playseries.hu**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **TV5MONDE**
|
||||
- **tv5unis**
|
||||
- **tv5unis:video**
|
||||
- **tv8.it**
|
||||
|
@ -1639,8 +1646,6 @@ # Supported sites
|
|||
- **voicy**: (**Currently broken**)
|
||||
- **voicy:channel**: (**Currently broken**)
|
||||
- **VolejTV**
|
||||
- **Voot**: [*voot*](## "netrc machine") (**Currently broken**)
|
||||
- **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**)
|
||||
- **VoxMedia**
|
||||
- **VoxMediaVolume**
|
||||
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
|
||||
|
@ -1715,10 +1720,10 @@ # Supported sites
|
|||
- **wykop:post:comment**
|
||||
- **Xanimu**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
|
||||
- **XHamster**
|
||||
- **XHamsterEmbed**
|
||||
- **XHamsterUser**
|
||||
- **XiaoHongShu**: 小红书
|
||||
- **ximalaya**: 喜马拉雅FM
|
||||
- **ximalaya:album**: 喜马拉雅FM 专辑
|
||||
- **xinpianchang**: xinpianchang.com (**Currently broken**)
|
||||
|
@ -1749,8 +1754,12 @@ # Supported sites
|
|||
- **YouNowLive**
|
||||
- **YouNowMoment**
|
||||
- **YouPorn**
|
||||
- **YourPorn**
|
||||
- **YourUpload**
|
||||
- **YouPornCategory**: YouPorn category, with sorting, filtering and pagination
|
||||
- **YouPornChannel**: YouPorn channel, with sorting and pagination
|
||||
- **YouPornCollection**: YouPorn collection (user playlist), with sorting and pagination
|
||||
- **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination
|
||||
- **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination
|
||||
- **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination
|
||||
- **youtube**: YouTube
|
||||
- **youtube:clip**
|
||||
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import functools
|
||||
import inspect
|
||||
|
||||
import pytest
|
||||
|
@ -10,7 +9,9 @@
|
|||
|
||||
@pytest.fixture
|
||||
def handler(request):
|
||||
RH_KEY = request.param
|
||||
RH_KEY = getattr(request, 'param', None)
|
||||
if not RH_KEY:
|
||||
return
|
||||
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
||||
handler = RH_KEY
|
||||
elif RH_KEY in _REQUEST_HANDLERS:
|
||||
|
@ -18,9 +19,46 @@ def handler(request):
|
|||
else:
|
||||
pytest.skip(f'{RH_KEY} request handler is not available')
|
||||
|
||||
return functools.partial(handler, logger=FakeLogger)
|
||||
class HandlerWrapper(handler):
|
||||
RH_KEY = handler.RH_KEY
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(logger=FakeLogger, *args, **kwargs)
|
||||
|
||||
return HandlerWrapper
|
||||
|
||||
|
||||
def validate_and_send(rh, req):
|
||||
rh.validate(req)
|
||||
return rh.send(req)
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_handler(request, handler):
|
||||
"""usage: pytest.mark.skip_handler('my_handler', 'reason')"""
|
||||
for marker in request.node.iter_markers('skip_handler'):
|
||||
if marker.args[0] == handler.RH_KEY:
|
||||
pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_handler_if(request, handler):
|
||||
"""usage: pytest.mark.skip_handler_if('my_handler', lambda request: True, 'reason')"""
|
||||
for marker in request.node.iter_markers('skip_handler_if'):
|
||||
if marker.args[0] == handler.RH_KEY and marker.args[1](request):
|
||||
pytest.skip(marker.args[2] if len(marker.args) > 2 else '')
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def skip_handlers_if(request, handler):
|
||||
"""usage: pytest.mark.skip_handlers_if(lambda request, handler: True, 'reason')"""
|
||||
for marker in request.node.iter_markers('skip_handlers_if'):
|
||||
if handler and marker.args[0](request, handler):
|
||||
pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line(
|
||||
"markers", "skip_handler(handler): skip test for the given handler",
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "skip_handler_if(handler): skip test for the given handler if condition is true"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "skip_handlers_if(handler): skip test for handlers when the condition is true"
|
||||
)
|
||||
|
|
|
@ -338,3 +338,8 @@ def http_server_port(httpd):
|
|||
def verify_address_availability(address):
|
||||
if find_available_port(address) is None:
|
||||
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
|
||||
|
||||
|
||||
def validate_and_send(rh, req):
|
||||
rh.validate(req)
|
||||
return rh.send(req)
|
||||
|
|
|
@ -1912,7 +1912,7 @@ def test_search_nextjs_data(self):
|
|||
self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
|
||||
with self.assertRaises(DeprecationWarning):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
||||
|
||||
|
||||
|
|
380
test/test_http_proxy.py
Normal file
380
test/test_http_proxy.py
Normal file
|
@ -0,0 +1,380 @@
|
|||
import abc
|
||||
import base64
|
||||
import contextlib
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import ssl
|
||||
import threading
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
from socketserver import ThreadingTCPServer
|
||||
|
||||
import pytest
|
||||
|
||||
from test.helper import http_server_port, verify_address_availability
|
||||
from test.test_networking import TEST_DIR
|
||||
from test.test_socks import IPv6ThreadingTCPServer
|
||||
from yt_dlp.dependencies import urllib3
|
||||
from yt_dlp.networking import Request
|
||||
from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError
|
||||
|
||||
|
||||
class HTTPProxyAuthMixin:
|
||||
|
||||
def proxy_auth_error(self):
|
||||
self.send_response(407)
|
||||
self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"')
|
||||
self.end_headers()
|
||||
return False
|
||||
|
||||
def do_proxy_auth(self, username, password):
|
||||
if username is None and password is None:
|
||||
return True
|
||||
|
||||
proxy_auth_header = self.headers.get('Proxy-Authorization', None)
|
||||
if proxy_auth_header is None:
|
||||
return self.proxy_auth_error()
|
||||
|
||||
if not proxy_auth_header.startswith('Basic '):
|
||||
return self.proxy_auth_error()
|
||||
|
||||
auth = proxy_auth_header[6:]
|
||||
|
||||
try:
|
||||
auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1)
|
||||
except Exception:
|
||||
return self.proxy_auth_error()
|
||||
|
||||
if auth_username != (username or '') or auth_password != (password or ''):
|
||||
return self.proxy_auth_error()
|
||||
return True
|
||||
|
||||
|
||||
class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
|
||||
def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs):
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.proxy_info = proxy_info
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def do_GET(self):
|
||||
if not self.do_proxy_auth(self.username, self.password):
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
if self.path.endswith('/proxy_info'):
|
||||
payload = json.dumps(self.proxy_info or {
|
||||
'client_address': self.client_address,
|
||||
'connect': False,
|
||||
'connect_host': None,
|
||||
'connect_port': None,
|
||||
'headers': dict(self.headers),
|
||||
'path': self.path,
|
||||
'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
|
||||
})
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'application/json; charset=utf-8')
|
||||
self.send_header('Content-Length', str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload.encode())
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
self.server.close_request(self.request)
|
||||
|
||||
|
||||
if urllib3:
|
||||
import urllib3.util.ssltransport
|
||||
|
||||
class SSLTransport(urllib3.util.ssltransport.SSLTransport):
|
||||
"""
|
||||
Modified version of urllib3 SSLTransport to support server side SSL
|
||||
|
||||
This allows us to chain multiple TLS connections.
|
||||
"""
|
||||
|
||||
def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False):
|
||||
self.incoming = ssl.MemoryBIO()
|
||||
self.outgoing = ssl.MemoryBIO()
|
||||
|
||||
self.suppress_ragged_eofs = suppress_ragged_eofs
|
||||
self.socket = socket
|
||||
|
||||
self.sslobj = ssl_context.wrap_bio(
|
||||
self.incoming,
|
||||
self.outgoing,
|
||||
server_hostname=server_hostname,
|
||||
server_side=server_side
|
||||
)
|
||||
self._ssl_io_loop(self.sslobj.do_handshake)
|
||||
|
||||
@property
|
||||
def _io_refs(self):
|
||||
return self.socket._io_refs
|
||||
|
||||
@_io_refs.setter
|
||||
def _io_refs(self, value):
|
||||
self.socket._io_refs = value
|
||||
|
||||
def shutdown(self, *args, **kwargs):
|
||||
self.socket.shutdown(*args, **kwargs)
|
||||
else:
|
||||
SSLTransport = None
|
||||
|
||||
|
||||
class HTTPSProxyHandler(HTTPProxyHandler):
|
||||
def __init__(self, request, *args, **kwargs):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
if isinstance(request, ssl.SSLSocket):
|
||||
request = SSLTransport(request, ssl_context=sslctx, server_side=True)
|
||||
else:
|
||||
request = sslctx.wrap_socket(request, server_side=True)
|
||||
super().__init__(request, *args, **kwargs)
|
||||
|
||||
|
||||
class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
|
||||
protocol_version = 'HTTP/1.1'
|
||||
default_request_version = 'HTTP/1.1'
|
||||
|
||||
def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs):
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.request_handler = request_handler
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def do_CONNECT(self):
|
||||
if not self.do_proxy_auth(self.username, self.password):
|
||||
self.server.close_request(self.request)
|
||||
return
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
proxy_info = {
|
||||
'client_address': self.client_address,
|
||||
'connect': True,
|
||||
'connect_host': self.path.split(':')[0],
|
||||
'connect_port': int(self.path.split(':')[1]),
|
||||
'headers': dict(self.headers),
|
||||
'path': self.path,
|
||||
'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
|
||||
}
|
||||
self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info)
|
||||
self.server.close_request(self.request)
|
||||
|
||||
|
||||
class HTTPSConnectProxyHandler(HTTPConnectProxyHandler):
|
||||
def __init__(self, request, *args, **kwargs):
|
||||
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||
sslctx.load_cert_chain(certfn, None)
|
||||
request = sslctx.wrap_socket(request, server_side=True)
|
||||
self._original_request = request
|
||||
super().__init__(request, *args, **kwargs)
|
||||
|
||||
def do_CONNECT(self):
|
||||
super().do_CONNECT()
|
||||
self.server.close_request(self._original_request)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs):
|
||||
server = server_thread = None
|
||||
try:
|
||||
bind_address = bind_ip or '127.0.0.1'
|
||||
server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
|
||||
server = server_type(
|
||||
(bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs))
|
||||
server_port = http_server_port(server)
|
||||
server_thread = threading.Thread(target=server.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
if '.' not in bind_address:
|
||||
yield f'[{bind_address}]:{server_port}'
|
||||
else:
|
||||
yield f'{bind_address}:{server_port}'
|
||||
finally:
|
||||
server.shutdown()
|
||||
server.server_close()
|
||||
server_thread.join(2.0)
|
||||
|
||||
|
||||
class HTTPProxyTestContext(abc.ABC):
|
||||
REQUEST_HANDLER_CLASS = None
|
||||
REQUEST_PROTO = None
|
||||
|
||||
def http_server(self, server_class, *args, **kwargs):
|
||||
return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
|
||||
|
||||
@abc.abstractmethod
|
||||
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
|
||||
"""return a dict of proxy_info"""
|
||||
|
||||
|
||||
class HTTPProxyHTTPTestContext(HTTPProxyTestContext):
|
||||
# Standard HTTP Proxy for http requests
|
||||
REQUEST_HANDLER_CLASS = HTTPProxyHandler
|
||||
REQUEST_PROTO = 'http'
|
||||
|
||||
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||
request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
|
||||
handler.validate(request)
|
||||
return json.loads(handler.send(request).read().decode())
|
||||
|
||||
|
||||
class HTTPProxyHTTPSTestContext(HTTPProxyTestContext):
|
||||
# HTTP Connect proxy, for https requests
|
||||
REQUEST_HANDLER_CLASS = HTTPSProxyHandler
|
||||
REQUEST_PROTO = 'https'
|
||||
|
||||
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||
request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
|
||||
handler.validate(request)
|
||||
return json.loads(handler.send(request).read().decode())
|
||||
|
||||
|
||||
CTX_MAP = {
|
||||
'http': HTTPProxyHTTPTestContext,
|
||||
'https': HTTPProxyHTTPSTestContext,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def ctx(request):
|
||||
return CTX_MAP[request.param]()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http
|
||||
class TestHTTPProxy:
|
||||
def test_http_no_auth(self, handler, ctx):
|
||||
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert proxy_info['connect'] is False
|
||||
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||
|
||||
def test_http_auth(self, handler, ctx):
|
||||
with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
|
||||
with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert 'Proxy-Authorization' in proxy_info['headers']
|
||||
|
||||
def test_http_bad_auth(self, handler, ctx):
|
||||
with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
|
||||
with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
|
||||
with pytest.raises(HTTPError) as exc_info:
|
||||
ctx.proxy_info_request(rh)
|
||||
assert exc_info.value.response.status == 407
|
||||
exc_info.value.response.close()
|
||||
|
||||
def test_http_source_address(self, handler, ctx):
|
||||
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
|
||||
source_address=source_address) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert proxy_info['client_address'][0] == source_address
|
||||
|
||||
@pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
|
||||
def test_https(self, handler, ctx):
|
||||
with ctx.http_server(HTTPSProxyHandler) as server_address:
|
||||
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert proxy_info['connect'] is False
|
||||
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||
|
||||
@pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
|
||||
def test_https_verify_failed(self, handler, ctx):
|
||||
with ctx.http_server(HTTPSProxyHandler) as server_address:
|
||||
with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||
# Accept SSLError as may not be feasible to tell if it is proxy or request error.
|
||||
# note: if request proto also does ssl verification, this may also be the error of the request.
|
||||
# Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
|
||||
with pytest.raises((ProxyError, SSLError)):
|
||||
ctx.proxy_info_request(rh)
|
||||
|
||||
def test_http_with_idn(self, handler, ctx):
|
||||
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw')
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert proxy_info['path'].startswith('http://xn--fiq228c.tw')
|
||||
assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'handler,ctx', [
|
||||
('Requests', 'https'),
|
||||
('CurlCFFI', 'https'),
|
||||
], indirect=True)
|
||||
class TestHTTPConnectProxy:
|
||||
def test_http_connect_no_auth(self, handler, ctx):
|
||||
with ctx.http_server(HTTPConnectProxyHandler) as server_address:
|
||||
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert proxy_info['connect'] is True
|
||||
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||
|
||||
def test_http_connect_auth(self, handler, ctx):
|
||||
with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
|
||||
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert 'Proxy-Authorization' in proxy_info['headers']
|
||||
|
||||
@pytest.mark.skip_handler(
|
||||
'Requests',
|
||||
'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374'
|
||||
)
|
||||
def test_http_connect_bad_auth(self, handler, ctx):
|
||||
with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
|
||||
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
|
||||
with pytest.raises(ProxyError):
|
||||
ctx.proxy_info_request(rh)
|
||||
|
||||
def test_http_connect_source_address(self, handler, ctx):
|
||||
with ctx.http_server(HTTPConnectProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
|
||||
source_address=source_address,
|
||||
verify=False) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert proxy_info['client_address'][0] == source_address
|
||||
|
||||
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||
def test_https_connect_proxy(self, handler, ctx):
|
||||
with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
|
||||
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert proxy_info['connect'] is True
|
||||
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||
|
||||
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||
def test_https_connect_verify_failed(self, handler, ctx):
|
||||
with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
|
||||
with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||
# Accept SSLError as may not be feasible to tell if it is proxy or request error.
|
||||
# note: if request proto also does ssl verification, this may also be the error of the request.
|
||||
# Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
|
||||
with pytest.raises((ProxyError, SSLError)):
|
||||
ctx.proxy_info_request(rh)
|
||||
|
||||
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||
def test_https_connect_proxy_auth(self, handler, ctx):
|
||||
with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address:
|
||||
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh:
|
||||
proxy_info = ctx.proxy_info_request(rh)
|
||||
assert proxy_info['proxy'] == server_address
|
||||
assert 'Proxy-Authorization' in proxy_info['headers']
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import gzip
|
||||
|
@ -27,8 +29,12 @@
|
|||
from email.message import Message
|
||||
from http.cookiejar import CookieJar
|
||||
|
||||
from test.conftest import validate_and_send
|
||||
from test.helper import FakeYDL, http_server_port, verify_address_availability
|
||||
from test.helper import (
|
||||
FakeYDL,
|
||||
http_server_port,
|
||||
validate_and_send,
|
||||
verify_address_availability,
|
||||
)
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
||||
from yt_dlp.networking import (
|
||||
|
@ -62,21 +68,6 @@
|
|||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def _build_proxy_handler(name):
|
||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
proxy_name = name
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
|
||||
return HTTPTestRequestHandler
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||
protocol_version = 'HTTP/1.1'
|
||||
default_request_version = 'HTTP/1.1'
|
||||
|
@ -317,8 +308,9 @@ def setup_class(cls):
|
|||
cls.https_server_thread.start()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
|
||||
def test_verify_cert(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(CertificateVerifyError):
|
||||
|
@ -329,7 +321,6 @@ def test_verify_cert(self, handler):
|
|||
assert r.status == 200
|
||||
r.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_ssl_error(self, handler):
|
||||
# HTTPS server with too old TLS version
|
||||
# XXX: is there a better way to test this than to create a new server?
|
||||
|
@ -347,7 +338,6 @@ def test_ssl_error(self, handler):
|
|||
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_percent_encode(self, handler):
|
||||
with handler() as rh:
|
||||
# Unicode characters should be encoded with uppercase percent-encoding
|
||||
|
@ -359,7 +349,6 @@ def test_percent_encode(self, handler):
|
|||
assert res.status == 200
|
||||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('path', [
|
||||
'/a/b/./../../headers',
|
||||
'/redirect_dotsegments',
|
||||
|
@ -375,15 +364,13 @@ def test_remove_dot_segments(self, handler, path):
|
|||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
||||
# Not supported by CurlCFFI (non-standard)
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)')
|
||||
def test_unicode_path_redirection(self, handler):
|
||||
with handler() as rh:
|
||||
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
|
||||
r.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_raise_http_error(self, handler):
|
||||
with handler() as rh:
|
||||
for bad_status in (400, 500, 599, 302):
|
||||
|
@ -393,7 +380,6 @@ def test_raise_http_error(self, handler):
|
|||
# Should not raise an error
|
||||
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_response_url(self, handler):
|
||||
with handler() as rh:
|
||||
# Response url should be that of the last url in redirect chain
|
||||
|
@ -405,7 +391,6 @@ def test_response_url(self, handler):
|
|||
res2.close()
|
||||
|
||||
# Covers some basic cases we expect some level of consistency between request handlers for
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.parametrize('redirect_status,method,expected', [
|
||||
# A 303 must either use GET or HEAD for subsequent request
|
||||
(303, 'POST', ('', 'GET', False)),
|
||||
|
@ -447,7 +432,6 @@ def test_redirect(self, handler, redirect_status, method, expected):
|
|||
assert expected[1] == res.headers.get('method')
|
||||
assert expected[2] == ('content-length' in headers.decode().lower())
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_request_cookie_header(self, handler):
|
||||
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
||||
with handler() as rh:
|
||||
|
@ -480,19 +464,16 @@ def test_request_cookie_header(self, handler):
|
|||
assert b'cookie: test=ytdlp' not in data.lower()
|
||||
assert b'cookie: test=test3' in data.lower()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_redirect_loop(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(HTTPError, match='redirect loop'):
|
||||
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_incompleteread(self, handler):
|
||||
with handler(timeout=2) as rh:
|
||||
with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
|
||||
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_cookies(self, handler):
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
|
@ -509,7 +490,6 @@ def test_cookies(self, handler):
|
|||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
||||
assert b'cookie: test=ytdlp' in data.lower()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_headers(self, handler):
|
||||
|
||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||
|
@ -525,7 +505,6 @@ def test_headers(self, handler):
|
|||
assert b'test2: test2' not in data
|
||||
assert b'test3: test3' in data
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_read_timeout(self, handler):
|
||||
with handler() as rh:
|
||||
# Default timeout is 20 seconds, so this should go through
|
||||
|
@ -541,26 +520,21 @@ def test_read_timeout(self, handler):
|
|||
validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_connect_timeout(self, handler):
|
||||
# nothing should be listening on this port
|
||||
connect_timeout_url = 'http://10.255.255.255'
|
||||
with handler(timeout=0.01) as rh:
|
||||
with handler(timeout=0.01) as rh, pytest.raises(TransportError):
|
||||
now = time.time()
|
||||
with pytest.raises(TransportError):
|
||||
validate_and_send(
|
||||
rh, Request(connect_timeout_url))
|
||||
assert 0.01 <= time.time() - now < 20
|
||||
validate_and_send(rh, Request(connect_timeout_url))
|
||||
assert time.time() - now < DEFAULT_TIMEOUT
|
||||
|
||||
with handler() as rh:
|
||||
with pytest.raises(TransportError):
|
||||
# Per request timeout, should override handler timeout
|
||||
now = time.time()
|
||||
validate_and_send(
|
||||
rh, Request(connect_timeout_url, extensions={'timeout': 0.01}))
|
||||
assert 0.01 <= time.time() - now < 20
|
||||
# Per request timeout, should override handler timeout
|
||||
request = Request(connect_timeout_url, extensions={'timeout': 0.01})
|
||||
with handler() as rh, pytest.raises(TransportError):
|
||||
now = time.time()
|
||||
validate_and_send(rh, request)
|
||||
assert time.time() - now < DEFAULT_TIMEOUT
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
# on some systems these loopback addresses we need for testing may not be available
|
||||
|
@ -572,13 +546,13 @@ def test_source_address(self, handler):
|
|||
assert source_address == data
|
||||
|
||||
# Not supported by CurlCFFI
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
|
||||
def test_gzip_trailing_garbage(self, handler):
|
||||
with handler() as rh:
|
||||
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
||||
assert data == '<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
|
||||
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
||||
def test_brotli(self, handler):
|
||||
with handler() as rh:
|
||||
|
@ -589,7 +563,6 @@ def test_brotli(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'br'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_deflate(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -599,7 +572,6 @@ def test_deflate(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'deflate'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_gzip(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -609,7 +581,6 @@ def test_gzip(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'gzip'
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_multiple_encodings(self, handler):
|
||||
with handler() as rh:
|
||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||
|
@ -620,8 +591,7 @@ def test_multiple_encodings(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == pair
|
||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||
|
||||
# Not supported by curl_cffi
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
|
||||
def test_unsupported_encoding(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -631,7 +601,6 @@ def test_unsupported_encoding(self, handler):
|
|||
assert res.headers.get('Content-Encoding') == 'unsupported'
|
||||
assert res.read() == b'raw'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_read(self, handler):
|
||||
with handler() as rh:
|
||||
res = validate_and_send(
|
||||
|
@ -642,83 +611,48 @@ def test_read(self, handler):
|
|||
assert res.read().decode().endswith('\n\n')
|
||||
assert res.read() == b''
|
||||
|
||||
def test_request_disable_proxy(self, handler):
|
||||
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
|
||||
# Given the handler is configured with a proxy
|
||||
with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||
# When a proxy is explicitly set to None for the request
|
||||
res = validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None}))
|
||||
# Then no proxy should be used
|
||||
res.close()
|
||||
assert res.status == 200
|
||||
|
||||
class TestHTTPProxy(TestRequestHandlerBase):
|
||||
# Note: this only tests http urls over non-CONNECT proxy
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
super().setup_class()
|
||||
# HTTP Proxy server
|
||||
cls.proxy = http.server.ThreadingHTTPServer(
|
||||
('127.0.0.1', 0), _build_proxy_handler('normal'))
|
||||
cls.proxy_port = http_server_port(cls.proxy)
|
||||
cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
|
||||
cls.proxy_thread.daemon = True
|
||||
cls.proxy_thread.start()
|
||||
|
||||
# Geo proxy server
|
||||
cls.geo_proxy = http.server.ThreadingHTTPServer(
|
||||
('127.0.0.1', 0), _build_proxy_handler('geo'))
|
||||
cls.geo_port = http_server_port(cls.geo_proxy)
|
||||
cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
|
||||
cls.geo_proxy_thread.daemon = True
|
||||
cls.geo_proxy_thread.start()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_http_proxy(self, handler):
|
||||
http_proxy = f'http://127.0.0.1:{self.proxy_port}'
|
||||
geo_proxy = f'http://127.0.0.1:{self.geo_port}'
|
||||
|
||||
# Test global http proxy
|
||||
# Test per request http proxy
|
||||
# Test per request http proxy disables proxy
|
||||
url = 'http://foo.com/bar'
|
||||
|
||||
# Global HTTP proxy
|
||||
with handler(proxies={'http': http_proxy}) as rh:
|
||||
res = validate_and_send(rh, Request(url)).read().decode()
|
||||
assert res == f'normal: {url}'
|
||||
|
||||
# Per request proxy overrides global
|
||||
res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
|
||||
assert res == f'geo: {url}'
|
||||
|
||||
# and setting to None disables all proxies for that request
|
||||
real_url = f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res = validate_and_send(
|
||||
rh, Request(real_url, proxies={'http': None})).read().decode()
|
||||
assert res != f'normal: {real_url}'
|
||||
assert 'Accept' in res
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.skip_handlers_if(
|
||||
lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
|
||||
def test_noproxy(self, handler):
|
||||
with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
|
||||
# NO_PROXY
|
||||
for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
|
||||
nop_response = validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
|
||||
'utf-8')
|
||||
assert 'Accept' in nop_response
|
||||
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
|
||||
# Given the handler is configured with a proxy
|
||||
with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||
for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
|
||||
# When request no proxy includes the request url host
|
||||
nop_response = validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy}))
|
||||
# Then the proxy should not be used
|
||||
assert nop_response.status == 200
|
||||
nop_response.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
@pytest.mark.skip_handlers_if(
|
||||
lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
|
||||
def test_allproxy(self, handler):
|
||||
url = 'http://foo.com/bar'
|
||||
with handler() as rh:
|
||||
response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
|
||||
'utf-8')
|
||||
assert response == f'normal: {url}'
|
||||
# This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
|
||||
# 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
|
||||
with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_http_proxy_with_idn(self, handler):
|
||||
with handler(proxies={
|
||||
'http': f'http://127.0.0.1:{self.proxy_port}',
|
||||
}) as rh:
|
||||
url = 'http://中文.tw/'
|
||||
response = rh.send(Request(url)).read().decode()
|
||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
||||
assert response == 'normal: http://xn--fiq228c.tw/'
|
||||
with handler(timeout=0.1) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
validate_and_send(
|
||||
rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
class TestClientCertificate:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
|
@ -745,27 +679,23 @@ def _run_test(self, handler, **handler_kwargs):
|
|||
) as rh:
|
||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_certificate_combined_nopass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_certificate_nocombined_nopass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_certificate_combined_pass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||
'client_certificate_password': 'foobar',
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_certificate_nocombined_pass(self, handler):
|
||||
self._run_test(handler, client_cert={
|
||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||
|
@ -824,8 +754,8 @@ def test_remove_logging_handler(self, handler, logger_name):
|
|||
assert len(logging_handlers) == before_count
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_file_urls(self, handler):
|
||||
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||
|
@ -847,7 +777,6 @@ def test_file_urls(self, handler):
|
|||
|
||||
os.unlink(tf.name)
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_http_error_returns_content(self, handler):
|
||||
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
||||
def get_response():
|
||||
|
@ -860,7 +789,6 @@ def get_response():
|
|||
|
||||
assert get_response().read() == b'<html></html>'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_verify_cert_error_text(self, handler):
|
||||
# Check the output of the error message
|
||||
with handler() as rh:
|
||||
|
@ -870,7 +798,6 @@ def test_verify_cert_error_text(self, handler):
|
|||
):
|
||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
@pytest.mark.parametrize('req,match,version_check', [
|
||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
|
||||
# bpo-39603: Check implemented in 3.7.9+, 3.8.5+
|
||||
|
@ -1202,7 +1129,7 @@ class HTTPSupportedRH(ValidationRH):
|
|||
]
|
||||
|
||||
PROXY_SCHEME_TESTS = [
|
||||
# scheme, expected to fail
|
||||
# proxy scheme, expected to fail
|
||||
('Urllib', 'http', [
|
||||
('http', False),
|
||||
('https', UnsupportedRequest),
|
||||
|
@ -1228,30 +1155,41 @@ class HTTPSupportedRH(ValidationRH):
|
|||
('socks5', False),
|
||||
('socks5h', False),
|
||||
]),
|
||||
('Websockets', 'ws', [
|
||||
('http', UnsupportedRequest),
|
||||
('https', UnsupportedRequest),
|
||||
('socks4', False),
|
||||
('socks4a', False),
|
||||
('socks5', False),
|
||||
('socks5h', False),
|
||||
]),
|
||||
(NoCheckRH, 'http', [('http', False)]),
|
||||
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
||||
('Websockets', 'ws', [('http', UnsupportedRequest)]),
|
||||
(NoCheckRH, 'http', [('http', False)]),
|
||||
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
||||
]
|
||||
|
||||
PROXY_KEY_TESTS = [
|
||||
# key, expected to fail
|
||||
('Urllib', [
|
||||
('all', False),
|
||||
('unrelated', False),
|
||||
# proxy key, proxy scheme, expected to fail
|
||||
('Urllib', 'http', [
|
||||
('all', 'http', False),
|
||||
('unrelated', 'http', False),
|
||||
]),
|
||||
('Requests', [
|
||||
('all', False),
|
||||
('unrelated', False),
|
||||
('Requests', 'http', [
|
||||
('all', 'http', False),
|
||||
('unrelated', 'http', False),
|
||||
]),
|
||||
('CurlCFFI', [
|
||||
('all', False),
|
||||
('unrelated', False),
|
||||
('CurlCFFI', 'http', [
|
||||
('all', 'http', False),
|
||||
('unrelated', 'http', False),
|
||||
]),
|
||||
(NoCheckRH, [('all', False)]),
|
||||
(HTTPSupportedRH, [('all', UnsupportedRequest)]),
|
||||
(HTTPSupportedRH, [('no', UnsupportedRequest)]),
|
||||
('Websockets', 'ws', [
|
||||
('all', 'socks5', False),
|
||||
('unrelated', 'socks5', False),
|
||||
]),
|
||||
(NoCheckRH, 'http', [('all', 'http', False)]),
|
||||
(HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]),
|
||||
(HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]),
|
||||
]
|
||||
|
||||
EXTENSION_TESTS = [
|
||||
|
@ -1293,28 +1231,54 @@ class HTTPSupportedRH(ValidationRH):
|
|||
]),
|
||||
]
|
||||
|
||||
@pytest.mark.parametrize('handler,fail,scheme', [
|
||||
('Urllib', False, 'http'),
|
||||
('Requests', False, 'http'),
|
||||
('CurlCFFI', False, 'http'),
|
||||
('Websockets', False, 'ws')
|
||||
], indirect=['handler'])
|
||||
def test_no_proxy(self, handler, fail, scheme):
|
||||
run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
|
||||
run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
|
||||
|
||||
@pytest.mark.parametrize('handler,scheme', [
|
||||
('Urllib', 'http'),
|
||||
(HTTPSupportedRH, 'http'),
|
||||
('Requests', 'http'),
|
||||
('CurlCFFI', 'http'),
|
||||
('Websockets', 'ws')
|
||||
], indirect=['handler'])
|
||||
def test_empty_proxy(self, handler, scheme):
|
||||
run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None}))
|
||||
run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None})
|
||||
|
||||
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
||||
@pytest.mark.parametrize('handler,scheme', [
|
||||
('Urllib', 'http'),
|
||||
(HTTPSupportedRH, 'http'),
|
||||
('Requests', 'http'),
|
||||
('CurlCFFI', 'http'),
|
||||
('Websockets', 'ws')
|
||||
], indirect=['handler'])
|
||||
def test_invalid_proxy_url(self, handler, scheme, proxy_url):
|
||||
run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url}))
|
||||
|
||||
@pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
|
||||
(handler_tests[0], scheme, fail, handler_kwargs)
|
||||
for handler_tests in URL_SCHEME_TESTS
|
||||
for scheme, fail, handler_kwargs in handler_tests[1]
|
||||
|
||||
], indirect=['handler'])
|
||||
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
|
||||
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
|
||||
|
||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False), ('CurlCFFI', False)], indirect=['handler'])
|
||||
def test_no_proxy(self, handler, fail):
|
||||
run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
|
||||
run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
|
||||
|
||||
@pytest.mark.parametrize('handler,proxy_key,fail', [
|
||||
(handler_tests[0], proxy_key, fail)
|
||||
@pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [
|
||||
(handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail)
|
||||
for handler_tests in PROXY_KEY_TESTS
|
||||
for proxy_key, fail in handler_tests[1]
|
||||
for proxy_key, proxy_scheme, fail in handler_tests[2]
|
||||
], indirect=['handler'])
|
||||
def test_proxy_key(self, handler, proxy_key, fail):
|
||||
run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
|
||||
run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
|
||||
def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail):
|
||||
run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'}))
|
||||
run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'})
|
||||
|
||||
@pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
|
||||
(handler_tests[0], handler_tests[1], scheme, fail)
|
||||
|
@ -1325,16 +1289,6 @@ def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
|
|||
run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
|
||||
run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_empty_proxy(self, handler):
|
||||
run_validation(handler, False, Request('http://', proxies={'http': None}))
|
||||
run_validation(handler, False, Request('http://'), proxies={'http': None})
|
||||
|
||||
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
def test_invalid_proxy_url(self, handler, proxy_url):
|
||||
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
|
||||
|
||||
@pytest.mark.parametrize('handler,scheme,extensions,fail', [
|
||||
(handler_tests[0], handler_tests[1], extensions, fail)
|
||||
for handler_tests in EXTENSION_TESTS
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
import sys
|
||||
import unittest
|
||||
import warnings
|
||||
import datetime as dt
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
@ -27,6 +28,7 @@
|
|||
ExtractorError,
|
||||
InAdvancePagedList,
|
||||
LazyList,
|
||||
NO_DEFAULT,
|
||||
OnDemandPagedList,
|
||||
Popen,
|
||||
age_restricted,
|
||||
|
@ -768,6 +770,11 @@ def test_encode_compat_str(self):
|
|||
|
||||
def test_parse_iso8601(self):
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None)
|
||||
# default does not override timezone in date_str
|
||||
self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266)
|
||||
self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266)
|
||||
|
|
|
@ -3,10 +3,12 @@
|
|||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from test.helper import verify_address_availability
|
||||
from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
@ -18,7 +20,7 @@
|
|||
import ssl
|
||||
import threading
|
||||
|
||||
from yt_dlp import socks
|
||||
from yt_dlp import socks, traverse_obj
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import websockets
|
||||
from yt_dlp.networking import Request
|
||||
|
@ -114,6 +116,7 @@ def ws_validate_and_send(rh, req):
|
|||
|
||||
|
||||
@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
class TestWebsSocketRequestHandlerConformance:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
|
@ -129,7 +132,6 @@ def setup_class(cls):
|
|||
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
|
||||
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_basic_websockets(self, handler):
|
||||
with handler() as rh:
|
||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||
|
@ -141,7 +143,6 @@ def test_basic_websockets(self, handler):
|
|||
|
||||
# https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
|
||||
@pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_send_types(self, handler, msg, opcode):
|
||||
with handler() as rh:
|
||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||
|
@ -149,7 +150,6 @@ def test_send_types(self, handler, msg, opcode):
|
|||
assert int(ws.recv()) == opcode
|
||||
ws.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_verify_cert(self, handler):
|
||||
with handler() as rh:
|
||||
with pytest.raises(CertificateVerifyError):
|
||||
|
@ -160,14 +160,12 @@ def test_verify_cert(self, handler):
|
|||
assert ws.status == 101
|
||||
ws.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_ssl_error(self, handler):
|
||||
with handler(verify=False) as rh:
|
||||
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
|
||||
ws_validate_and_send(rh, Request(self.bad_wss_host))
|
||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
@pytest.mark.parametrize('path,expected', [
|
||||
# Unicode characters should be encoded with uppercase percent-encoding
|
||||
('/中文', '/%E4%B8%AD%E6%96%87'),
|
||||
|
@ -182,7 +180,6 @@ def test_percent_encode(self, handler, path, expected):
|
|||
assert ws.status == 101
|
||||
ws.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_remove_dot_segments(self, handler):
|
||||
with handler() as rh:
|
||||
# This isn't a comprehensive test,
|
||||
|
@ -195,7 +192,6 @@ def test_remove_dot_segments(self, handler):
|
|||
|
||||
# We are restricted to known HTTP status codes in http.HTTPStatus
|
||||
# Redirects are not supported for websockets
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
@pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
|
||||
def test_raise_http_error(self, handler, status):
|
||||
with handler() as rh:
|
||||
|
@ -203,17 +199,30 @@ def test_raise_http_error(self, handler, status):
|
|||
ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
|
||||
assert exc_info.value.status == status
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
@pytest.mark.parametrize('params,extensions', [
|
||||
({'timeout': sys.float_info.min}, {}),
|
||||
({}, {'timeout': sys.float_info.min}),
|
||||
])
|
||||
def test_timeout(self, handler, params, extensions):
|
||||
def test_read_timeout(self, handler, params, extensions):
|
||||
with handler(**params) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_connect_timeout(self, handler):
|
||||
# nothing should be listening on this port
|
||||
connect_timeout_url = 'ws://10.255.255.255'
|
||||
with handler(timeout=0.01) as rh, pytest.raises(TransportError):
|
||||
now = time.time()
|
||||
ws_validate_and_send(rh, Request(connect_timeout_url))
|
||||
assert time.time() - now < DEFAULT_TIMEOUT
|
||||
|
||||
# Per request timeout, should override handler timeout
|
||||
request = Request(connect_timeout_url, extensions={'timeout': 0.01})
|
||||
with handler() as rh, pytest.raises(TransportError):
|
||||
now = time.time()
|
||||
ws_validate_and_send(rh, request)
|
||||
assert time.time() - now < DEFAULT_TIMEOUT
|
||||
|
||||
def test_cookies(self, handler):
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||
|
@ -239,7 +248,6 @@ def test_cookies(self, handler):
|
|||
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
|
||||
ws.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
|
@ -249,7 +257,6 @@ def test_source_address(self, handler):
|
|||
assert source_address == ws.recv()
|
||||
ws.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_response_url(self, handler):
|
||||
with handler() as rh:
|
||||
url = f'{self.ws_base_url}/something'
|
||||
|
@ -257,7 +264,6 @@ def test_response_url(self, handler):
|
|||
assert ws.url == url
|
||||
ws.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_request_headers(self, handler):
|
||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||
# Global Headers
|
||||
|
@ -293,7 +299,6 @@ def test_request_headers(self, handler):
|
|||
'client_certificate_password': 'foobar',
|
||||
}
|
||||
))
|
||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_mtls(self, handler, client_cert):
|
||||
with handler(
|
||||
# Disable client-side validation of unacceptable self-signed testcert.pem
|
||||
|
@ -303,6 +308,44 @@ def test_mtls(self, handler, client_cert):
|
|||
) as rh:
|
||||
ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
|
||||
|
||||
def test_request_disable_proxy(self, handler):
|
||||
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
|
||||
# Given handler is configured with a proxy
|
||||
with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||
# When a proxy is explicitly set to None for the request
|
||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'http': None}))
|
||||
# Then no proxy should be used
|
||||
assert ws.status == 101
|
||||
ws.close()
|
||||
|
||||
@pytest.mark.skip_handlers_if(
|
||||
lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
|
||||
def test_noproxy(self, handler):
|
||||
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
|
||||
# Given the handler is configured with a proxy
|
||||
with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||
for no_proxy in (f'127.0.0.1:{self.ws_port}', '127.0.0.1', 'localhost'):
|
||||
# When request no proxy includes the request url host
|
||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'no': no_proxy}))
|
||||
# Then the proxy should not be used
|
||||
assert ws.status == 101
|
||||
ws.close()
|
||||
|
||||
@pytest.mark.skip_handlers_if(
|
||||
lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
|
||||
def test_allproxy(self, handler):
|
||||
supported_proto = traverse_obj(handler._SUPPORTED_PROXY_SCHEMES, 0, default='ws')
|
||||
# This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
|
||||
# 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
|
||||
with handler(proxies={'all': f'{supported_proto}://10.255.255.255'}, timeout=0.1) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
ws_validate_and_send(rh, Request(self.ws_base_url)).close()
|
||||
|
||||
with handler(timeout=0.1) as rh:
|
||||
with pytest.raises(TransportError):
|
||||
ws_validate_and_send(
|
||||
rh, Request(self.ws_base_url, proxies={'all': f'{supported_proto}://10.255.255.255'})).close()
|
||||
|
||||
|
||||
def create_fake_ws_connection(raised):
|
||||
import websockets.sync.client
|
||||
|
|
|
@ -3071,7 +3071,7 @@ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
|
|||
f = formats[-1]
|
||||
self.report_warning(
|
||||
'No subtitle format found matching "%s" for language %s, '
|
||||
'using %s' % (formats_query, lang, f['ext']))
|
||||
'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
|
@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
|
@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
|
@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
'whale': os.path.join(config, 'naver-whale'),
|
||||
}[browser_name]
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
|
@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
|||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
'whale': 'Whale',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
|
@ -347,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
|
|||
if value is None:
|
||||
return is_encrypted, None
|
||||
|
||||
# In chrome, session cookies have expires_utc set to 0
|
||||
# In our cookie-store, cookies that do not expire should have expires set to None
|
||||
if not expires_utc:
|
||||
expires_utc = None
|
||||
|
||||
return is_encrypted, http.cookiejar.Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -6,10 +6,10 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
|
|
|
@ -12,20 +12,21 @@
|
|||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -10,18 +10,18 @@
|
|||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
|
|
|
@ -4,11 +4,11 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
ISO639Utils,
|
||||
join_nonempty,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
_FIELDS = '''
|
||||
_id
|
||||
clipImageSource
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,17 +1,13 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
|
@ -39,7 +35,7 @@ class AluraIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
|
||||
course, video_id = self._match_valid_url(url)
|
||||
course, video_id = self._match_valid_url(url).group('course_name', 'id')
|
||||
video_url = self._VIDEO_URL % (course, video_id)
|
||||
|
||||
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
||||
|
@ -52,7 +48,7 @@ def _real_extract(self, url):
|
|||
|
||||
formats = []
|
||||
for video_obj in video_dict:
|
||||
video_url_m3u8 = video_obj.get('link')
|
||||
video_url_m3u8 = video_obj.get('mp4')
|
||||
video_format = self._extract_m3u8_formats(
|
||||
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none, merge_dicts
|
||||
from ..utils import merge_dicts, url_or_none
|
||||
|
||||
|
||||
class AngelIE(InfoExtractor):
|
||||
|
|
|
@ -1,8 +1,5 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
from ..utils import ExtractorError, str_to_int
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import re
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
strip_or_none,
|
||||
|
@ -31,20 +32,6 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||
'info_dict': {
|
||||
'id': '100103-000-A',
|
||||
'title': 'USA: Dyskryminacja na porodówce',
|
||||
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
|
||||
'alt_title': 'ARTE Reportage',
|
||||
'upload_date': '20201103',
|
||||
'duration': 554,
|
||||
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
|
||||
'timestamp': 1604417980,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'note': 'No alt_title',
|
||||
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
||||
|
@ -58,6 +45,23 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
|
||||
'info_dict': {
|
||||
'id': '109067-000-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
|
||||
'timestamp': 1713927600,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
|
||||
'duration': 7599,
|
||||
'title': 'La loi de Téhéran',
|
||||
'upload_date': '20240424',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
'fr-forced': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'note': 'age-restricted',
|
||||
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
|
||||
|
@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
||||
'info_dict': {
|
||||
'id': '085374-003-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
||||
'timestamp': 1702872000,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
||||
'duration': 2594,
|
||||
'title': 'Die kurze Zeit der Jugend',
|
||||
'alt_title': 'Im hohen Norden geboren',
|
||||
'upload_date': '20231218',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
},
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
|
@ -143,16 +131,18 @@ def _fix_accessible_subs_locale(subs):
|
|||
updated_subs = {}
|
||||
for lang, sub_formats in subs.items():
|
||||
for fmt in sub_formats:
|
||||
if fmt.get('url', '').endswith('-MAL.m3u8'):
|
||||
lang += '-acc'
|
||||
updated_subs.setdefault(lang, []).append(fmt)
|
||||
url = fmt.get('url') or ''
|
||||
suffix = ('acc' if url.endswith('-MAL.m3u8')
|
||||
else 'forced' if '_VO' not in url
|
||||
else None)
|
||||
updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
|
||||
return updated_subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
langauge_code = self._LANG_MAP.get(lang)
|
||||
language_code = self._LANG_MAP.get(lang)
|
||||
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
||||
'x-validated-age': '18'
|
||||
|
@ -180,10 +170,10 @@ def _real_extract(self, url):
|
|||
m = self._VERSION_CODE_RE.match(stream_version_code)
|
||||
if m:
|
||||
lang_pref = int(''.join('01'[x] for x in (
|
||||
m.group('vlang') == langauge_code, # we prefer voice in the requested language
|
||||
m.group('vlang') == language_code, # we prefer voice in the requested language
|
||||
not m.group('audio_desc'), # and not the audio description version
|
||||
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
|
||||
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
|
||||
m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
|
||||
not m.group('has_sub'), # but we prefer no subtitles otherwise
|
||||
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
|
||||
)))
|
||||
|
|
|
@ -2,10 +2,10 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
jwt_encode_hs256,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlencode,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
|
|
|
@ -2,12 +2,12 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
InAdvancePagedList,
|
||||
format_field,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
|
|
@ -2,11 +2,11 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
try_get,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
|
@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BUGGER',
|
||||
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
|
@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
||||
'title': 'Germanwings crash site aerial video',
|
||||
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
|
@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'now SIMORGH_DATA with no video',
|
||||
}, {
|
||||
# single video embedded with data-playable containing XML playlists (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'id': '39275083',
|
||||
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||
|
@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
# single video story with __PWA_PRELOADED_STATE__
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tasting the spice of life in Jaffna',
|
||||
'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
|
||||
'timestamp': 1646058397,
|
||||
'upload_date': '20220228',
|
||||
'duration': 255,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
|
@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'timestamp': 1415867444,
|
||||
'upload_date': '20141113',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'skip': 'redirects to TopGear home page',
|
||||
}, {
|
||||
# single video embedded with Morph
|
||||
# TODO: replacement test page
|
||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||
'info_dict': {
|
||||
'id': 'p041vhd0',
|
||||
|
@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader': 'BBC Sport',
|
||||
'uploader_id': 'bbc_sport',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to UK',
|
||||
'skip': 'Video no longer in page',
|
||||
}, {
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
# single video in __INITIAL_DATA__
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
||||
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
|
||||
'timestamp': 1437750175,
|
||||
'upload_date': '20150724',
|
||||
'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
||||
# article with multiple videos embedded with Morph.setPayload
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
|
@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# Testing noplaylist
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': 'p034ppnv',
|
||||
'ext': 'mp4',
|
||||
'title': 'All you need to know about Jurgen Klopp',
|
||||
'timestamp': 1444335081,
|
||||
'upload_date': '20151008',
|
||||
'duration': 122.0,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
# school report article with single video
|
||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||
|
@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'title': 'School which breaks down barriers in Jerusalem',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
|
@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
'duration': 105,
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'id': 'p0b779gc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
'timestamp': 1638215626,
|
||||
'upload_date': '20211129',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
# video with script id __NEXT_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/uk-68546268',
|
||||
'info_dict': {
|
||||
'id': 'p0hj0lq7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nasser Hospital doctor describes his treatment by IDF',
|
||||
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1710188248,
|
||||
'upload_date': '20240311',
|
||||
'duration': 104,
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
|
@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
|
@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}, {
|
||||
|
@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': 'p07c6sb9',
|
||||
'ext': 'mp4',
|
||||
'title': 'How positive thinking is harming your happiness',
|
||||
'alt_title': 'The downsides of positive thinking',
|
||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||
'title': 'The downsides of positive thinking',
|
||||
'description': 'The downsides of positive thinking',
|
||||
'duration': 235,
|
||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
|
||||
'upload_date': '20220223',
|
||||
'timestamp': 1645632746,
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'id': 'p0hrw4nr',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'title': 'Are our coastlines being washed away?',
|
||||
'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
|
||||
'timestamp': 1713556800,
|
||||
'upload_date': '20240419',
|
||||
'duration': 1588,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
|
||||
'uploader': 'World Service',
|
||||
'uploader_id': 'bbc_world_service',
|
||||
'series': 'CrowdScience',
|
||||
'chapters': [],
|
||||
}
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
|
@ -1008,8 +1039,7 @@ def _real_extract(self, url):
|
|||
webpage, 'group id', default=None)
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
|
@ -1069,83 +1099,133 @@ def _real_extract(self, url):
|
|||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
morph_payload = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||
webpage, 'morph payload', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
# Several setPayload calls may be present but the video(s)
|
||||
# should be in one that mentions leadMedia or videoData
|
||||
morph_payload = self._search_json(
|
||||
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
|
||||
contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
|
||||
default={})
|
||||
if morph_payload:
|
||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||
for component in components:
|
||||
if not isinstance(component, dict):
|
||||
continue
|
||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||
if not lead_media:
|
||||
continue
|
||||
identifiers = lead_media.get('identifiers')
|
||||
if not identifiers or not isinstance(identifiers, dict):
|
||||
continue
|
||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||
for lead_media in traverse_obj(morph_payload, (
|
||||
'body', 'components', ..., 'props', 'leadMedia', {dict})):
|
||||
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
|
||||
if not programme_id:
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
duration = None
|
||||
duration_d = lead_media.get('duration')
|
||||
if isinstance(duration_d, dict):
|
||||
duration = parse_duration(dict_get(
|
||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': lead_media.get('title') or self._og_search_title(webpage),
|
||||
**traverse_obj(lead_media, {
|
||||
'description': ('summary', {str}),
|
||||
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
|
||||
'uploader': ('masterBrand', {str}),
|
||||
'uploader_id': ('mid', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
body = self._parse_json(traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
|
||||
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
|
||||
if video_data.get('vpid'):
|
||||
video_id = video_data['vpid']
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
entry = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
else:
|
||||
video_id = video_data['pid']
|
||||
entry = self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
|
||||
video_id, url_transparent=True)
|
||||
entry.update({
|
||||
'timestamp': traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
|
||||
),
|
||||
**traverse_obj(video_data, {
|
||||
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||
'title': (('title', 'caption'), {str}, any),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
}),
|
||||
})
|
||||
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
|
||||
return entry
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
playlist_title = traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'headline', {str})) or playlist_title
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||
# various PRELOADED_STATE JSON
|
||||
preload_state = self._search_json(
|
||||
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
|
||||
'preload state', playlist_id, transform_source=js_to_json, default={})
|
||||
# PRELOADED_STATE with current programmme
|
||||
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
|
||||
programme_id = traverse_obj(current_programme, ('id', {str}))
|
||||
if programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
**traverse_obj(current_programme, {
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
|
||||
'duration': ('duration', 'value', {int_or_none}),
|
||||
'uploader': ('network', 'short_title', {str}),
|
||||
'uploader_id': ('network', 'id', {str}),
|
||||
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
|
||||
'series': ('titles', 'primary', {str}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})
|
||||
),
|
||||
}
|
||||
|
||||
# PWA_PRELOADED_STATE with article video asset
|
||||
asset_id = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
|
||||
'assetVideo', 0, {str}, any))
|
||||
if asset_id:
|
||||
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
|
||||
if video_id:
|
||||
article = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
|
||||
|
||||
def image_url(image_id):
|
||||
return traverse_obj(preload_state, (
|
||||
'entities', 'images', image_id, 'url',
|
||||
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
|
||||
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'id': video_id,
|
||||
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
|
||||
'title': ('title', {str}),
|
||||
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
|
||||
'thumbnail': (0, {image_url}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
|
||||
}
|
||||
else:
|
||||
return self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
|
||||
asset_id, playlist_title, display_id=playlist_id,
|
||||
description=playlist_description)
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
|
@ -1191,6 +1271,28 @@ def _real_extract(self, url):
|
|||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_model(model):
|
||||
"""Extract single video from model structure"""
|
||||
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||
if not item_id:
|
||||
return
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
return {
|
||||
'id': item_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
})
|
||||
}
|
||||
|
||||
def is_type(*types):
|
||||
return lambda _, v: v['type'] in types
|
||||
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
|
@ -1202,6 +1304,19 @@ def _real_extract(self, url):
|
|||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
for video_data in traverse_obj(initial_data, (
|
||||
'stores', 'article', 'articleBodyContent', is_type('video'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
entry = parse_model(model)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
return
|
||||
|
@ -1234,27 +1349,90 @@ def parse_media(media):
|
|||
'subtitles': subtitles,
|
||||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
|
||||
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
|
||||
name = resp['name']
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') not in ['media', 'video']:
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
for block in traverse_obj(resp, (
|
||||
'data', (None, ('content', 'model')), 'blocks',
|
||||
is_type('media', 'video'), 'model', {dict})):
|
||||
parse_media(block)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# extract from SIMORGH_DATA hydration JSON
|
||||
simorgh_data = self._search_json(
|
||||
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
|
||||
'simorgh data', playlist_id, default={})
|
||||
if simorgh_data:
|
||||
done = False
|
||||
for video_data in traverse_obj(simorgh_data, (
|
||||
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if video_data['type'] == 'video':
|
||||
entry = parse_model(model)
|
||||
else: # legacyMedia: no duration, subtitles
|
||||
block_id, entry = traverse_obj(model, ('blockId', {str})), None
|
||||
media_data = traverse_obj(simorgh_data, (
|
||||
'pageData', 'promo', 'media',
|
||||
{lambda x: x if x['id'] == block_id else None}))
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
'id': block_id,
|
||||
'display_id': playlist_id,
|
||||
'formats': formats,
|
||||
'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if done:
|
||||
break
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
|
||||
('props', 'pageProps', 'page'))
|
||||
model = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('video'),
|
||||
'model', 'blocks', is_type('media'),
|
||||
'model', 'blocks', is_type('mediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if model and (entry := parse_model(model)):
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
|
|
|
@ -1045,7 +1045,8 @@ def fetch_page(page_idx):
|
|||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||
headers={'referer': url})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from .common import InfoExtractor
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
|
@ -5,7 +6,6 @@
|
|||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloggerIE(InfoExtractor):
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
)
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
)
|
||||
from ..utils import js_to_json, traverse_obj, unified_timestamp
|
||||
|
||||
|
||||
class BoxCastVideoIE(InfoExtractor):
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
classproperty,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
urljoin
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -12,10 +12,11 @@
|
|||
)
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
dict_get,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
find_xpath_attr,
|
||||
fix_xml_ampersands,
|
||||
float_or_none,
|
||||
|
@ -29,7 +30,6 @@
|
|||
try_get,
|
||||
unescapeHTML,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
|
|
@ -27,8 +27,17 @@ def _get_logged_in_username(self, url, video_id):
|
|||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
self._LOGIN_API, None, 'Downloading login page'))
|
||||
login_page, urlh = self._download_webpage_handle(
|
||||
self._LOGIN_API, None, 'Downloading login page', expected_status=401)
|
||||
if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API):
|
||||
self.write_debug('Cookies are valid, no login required.')
|
||||
return
|
||||
|
||||
if urlh.status == 401:
|
||||
self.write_debug('Got HTTP Error 401; cookies have been invalidated')
|
||||
login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CableAVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cableav.tv/lS4iR9lWjN8/',
|
||||
'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
|
||||
'info_dict': {
|
||||
'id': 'lS4iR9lWjN8',
|
||||
'ext': 'mp4',
|
||||
'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
|
||||
'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = self._og_search_video_url(webpage, secure=False)
|
||||
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
}
|
74
yt_dlp/extractor/caffeinetv.py
Normal file
74
yt_dlp/extractor/caffeinetv.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaffeineTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||
'info_dict': {
|
||||
'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e',
|
||||
'ext': 'mp4',
|
||||
'title': 'GOOOOD MORNINNNNN #highlights',
|
||||
'timestamp': 1654702180,
|
||||
'upload_date': '20220608',
|
||||
'uploader': 'RahJON Wicc',
|
||||
'uploader_id': 'TsuSurf',
|
||||
'duration': 3145,
|
||||
'age_limit': 17,
|
||||
'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'tags': ['highlights', 'battlerap'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(
|
||||
f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id)
|
||||
broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {}
|
||||
|
||||
video_url = broadcast_info['video_url']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
||||
else:
|
||||
formats = [{'url': video_url}]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(json_data, {
|
||||
'like_count': ('like_count', {int_or_none}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'tags': ('tags', ..., {str}, {lambda x: x or None}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': (((None, 'user'), 'username'), {str}, any),
|
||||
'is_live': ('is_live', {bool}),
|
||||
}),
|
||||
**traverse_obj(broadcast_info, {
|
||||
'title': ('broadcast_title', {str}),
|
||||
'duration': ('content_duration', {int_or_none}),
|
||||
'timestamp': ('broadcast_start_time', {parse_iso8601}),
|
||||
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
'age_limit': {
|
||||
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system
|
||||
'FOUR_PLUS': 0,
|
||||
'NINE_PLUS': 9,
|
||||
'TWELVE_PLUS': 12,
|
||||
'SEVENTEEN_PLUS': 17,
|
||||
}.get(broadcast_info.get('content_rating'), 17),
|
||||
}
|
|
@ -40,7 +40,7 @@ class CanalAlphaIE(InfoExtractor):
|
|||
'id': '24484',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
|
||||
'description': 'md5:3de3f151180684621e85be7c10e4e613',
|
||||
'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129',
|
||||
'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
|
||||
'upload_date': '20211026',
|
||||
'duration': 360,
|
||||
|
@ -58,14 +58,25 @@ class CanalAlphaIE(InfoExtractor):
|
|||
'duration': 360,
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}, {
|
||||
'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura',
|
||||
'info_dict': {
|
||||
'id': '33500',
|
||||
'ext': 'mp4',
|
||||
'title': 'Encore des mesures d\'économie dans le Jura',
|
||||
'description': 'md5:938b5b556592f2d1b9ab150268082a80',
|
||||
'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg',
|
||||
'upload_date': '20240411',
|
||||
'duration': 105,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._parse_json(self._search_regex(
|
||||
r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
|
||||
webpage, 'data_json'), id)['1']['data']['data']
|
||||
webpage, 'data_json'), video_id)['1']['data']['data']
|
||||
manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
|
||||
subtitles = {}
|
||||
formats = [{
|
||||
|
@ -75,15 +86,17 @@ def _real_extract(self, url):
|
|||
'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
|
||||
} for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
|
||||
if manifests.get('hls'):
|
||||
m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
|
||||
formats.extend(m3u8_frmts)
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
manifests['hls'], video_id, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if manifests.get('dash'):
|
||||
dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
|
||||
formats.extend(dash_frmts)
|
||||
subtitles = self._merge_subtitles(subtitles, dash_subs)
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifests['dash'], video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
return {
|
||||
'id': id,
|
||||
'id': video_id,
|
||||
'title': data_json.get('title').strip(),
|
||||
'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
|
||||
'thumbnail': data_json.get('poster'),
|
||||
|
|
|
@ -5,14 +5,14 @@
|
|||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
merge_dicts,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
random_birthday,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
|
@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
|
|||
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'crash404',
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
'duration': 137,
|
||||
'age_limit': 0,
|
||||
'upload_date': '20160220',
|
||||
'timestamp': 1455968218,
|
||||
}
|
||||
}, {
|
||||
# Age-restricted
|
||||
'url': 'http://www.cda.pl/video/1273454c4',
|
||||
# Age-restricted with vfilm redirection
|
||||
'url': 'https://www.cda.pl/video/8753244c4',
|
||||
'md5': 'd8eeb83d63611289507010d3df3bb8b3',
|
||||
'info_dict': {
|
||||
'id': '1273454c4',
|
||||
'id': '8753244c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bronson (2008) napisy HD 1080p',
|
||||
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
||||
'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
|
||||
'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
|
||||
'height': 1080,
|
||||
'uploader': 'boniek61',
|
||||
'uploader': 'arhn eu',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 5554,
|
||||
'duration': 991,
|
||||
'age_limit': 18,
|
||||
'view_count': int,
|
||||
'average_rating': float,
|
||||
},
|
||||
'timestamp': 1633888264,
|
||||
'upload_date': '20211010',
|
||||
}
|
||||
}, {
|
||||
# Age-restricted without vfilm redirection
|
||||
'url': 'https://www.cda.pl/video/17028157b8',
|
||||
'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
|
||||
'info_dict': {
|
||||
'id': '17028157b8',
|
||||
'ext': 'mp4',
|
||||
'title': 'STENDUPY MICHAŁ OGIŃSKI',
|
||||
'description': 'md5:5851f3272bfc31f762d616040a1d609a',
|
||||
'height': 480,
|
||||
'uploader': 'oginski',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 18855,
|
||||
'age_limit': 18,
|
||||
'average_rating': float,
|
||||
'timestamp': 1699705901,
|
||||
'upload_date': '20231111',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
||||
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
||||
data, content_type = multipart_encode(form_data)
|
||||
data, content_type = multipart_encode({'age_confirm': ''})
|
||||
return self._download_webpage(
|
||||
urljoin(url, '/a/validatebirth'), video_id, *args,
|
||||
url, video_id, *args,
|
||||
data=data, headers={
|
||||
'Referer': url,
|
||||
'Content-Type': content_type,
|
||||
|
@ -164,7 +182,7 @@ def _real_extract(self, url):
|
|||
if 'Authorization' in self._API_HEADERS:
|
||||
return self._api_extract(video_id)
|
||||
else:
|
||||
return self._web_extract(video_id, url)
|
||||
return self._web_extract(video_id)
|
||||
|
||||
def _api_extract(self, video_id):
|
||||
meta = self._download_json(
|
||||
|
@ -197,9 +215,9 @@ def _api_extract(self, video_id):
|
|||
'view_count': meta.get('views'),
|
||||
}
|
||||
|
||||
def _web_extract(self, video_id, url):
|
||||
def _web_extract(self, video_id):
|
||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||
webpage = self._download_webpage(
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
|
||||
|
||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||
|
@ -209,10 +227,10 @@ def _web_extract(self, video_id, url):
|
|||
self.raise_geo_restricted()
|
||||
|
||||
need_confirm_age = False
|
||||
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
|
||||
if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
|
||||
webpage, 'birthday validate form', default=None):
|
||||
webpage = self._download_age_confirm_page(
|
||||
url, video_id, note='Confirming age')
|
||||
urlh.url, video_id, note='Confirming age')
|
||||
need_confirm_age = True
|
||||
|
||||
formats = []
|
||||
|
@ -222,9 +240,6 @@ def _web_extract(self, video_id, url):
|
|||
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
||||
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
||||
''', webpage, 'uploader', default=None, group='uploader')
|
||||
view_count = self._search_regex(
|
||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
||||
'view_count', default=None)
|
||||
average_rating = self._search_regex(
|
||||
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||
|
@ -235,7 +250,6 @@ def _web_extract(self, video_id, url):
|
|||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'uploader': uploader,
|
||||
'view_count': int_or_none(view_count),
|
||||
'average_rating': float_or_none(average_rating),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'formats': formats,
|
||||
|
|
|
@ -101,7 +101,7 @@ def _real_extract(self, url):
|
|||
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||
playlist_title = self._og_search_title(webpage, default=None)
|
||||
if site_name and playlist_title:
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0]
|
||||
playlist_description = self._og_search_description(webpage, default=None)
|
||||
if playlist_description:
|
||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
)
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class ClippitIE(InfoExtractor):
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import base64
|
||||
import collections
|
||||
import functools
|
||||
import getpass
|
||||
import hashlib
|
||||
import http.client
|
||||
|
@ -21,7 +22,6 @@
|
|||
import urllib.request
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from ..compat import functools # isort: split
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
|
@ -957,7 +957,8 @@ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=
|
|||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||
encoding=encoding, data=data)
|
||||
return (content, urlh)
|
||||
|
||||
@staticmethod
|
||||
|
@ -1005,8 +1006,10 @@ def __check_blocked(self, content):
|
|||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||
expected=True)
|
||||
|
||||
def _request_dump_filename(self, url, video_id):
|
||||
basen = f'{video_id}_{url}'
|
||||
def _request_dump_filename(self, url, video_id, data=None):
|
||||
if data is not None:
|
||||
data = hashlib.md5(data).hexdigest()
|
||||
basen = join_nonempty(video_id, data, url, delim='_')
|
||||
trim_length = self.get_param('trim_file_name') or 240
|
||||
if len(basen) > trim_length:
|
||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||
|
@ -1028,7 +1031,8 @@ def __decode_webpage(self, webpage_bytes, encoding, headers):
|
|||
except LookupError:
|
||||
return webpage_bytes.decode('utf-8', 'replace')
|
||||
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
prefix=None, encoding=None, data=None):
|
||||
webpage_bytes = urlh.read()
|
||||
if prefix is not None:
|
||||
webpage_bytes = prefix + webpage_bytes
|
||||
|
@ -1037,7 +1041,9 @@ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errno
|
|||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||
self._downloader.to_screen(dump)
|
||||
if self.get_param('write_pages'):
|
||||
filename = self._request_dump_filename(urlh.url, video_id)
|
||||
if isinstance(url_or_request, Request):
|
||||
data = self._create_request(url_or_request, data).data
|
||||
filename = self._request_dump_filename(urlh.url, video_id, data)
|
||||
self.to_screen(f'Saving request to {filename}')
|
||||
with open(filename, 'wb') as outf:
|
||||
outf.write(webpage_bytes)
|
||||
|
@ -1098,7 +1104,7 @@ def download_content(self, url_or_request, video_id, note=note, errnote=errnote,
|
|||
impersonate=None, require_impersonation=False):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
||||
self.to_screen(f'Loading request from {filename}')
|
||||
try:
|
||||
with open(filename, 'rb') as dumpf:
|
||||
|
@ -2445,7 +2451,7 @@ def _parse_smil_formats_and_subtitles(
|
|||
})
|
||||
continue
|
||||
|
||||
src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
|
||||
src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src)
|
||||
src_url = src_url.strip()
|
||||
|
||||
if proto == 'm3u8' or src_ext == 'm3u8':
|
||||
|
@ -3378,23 +3384,16 @@ def manifest_url(manifest):
|
|||
return formats
|
||||
|
||||
def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
|
||||
mobj = re.search(
|
||||
r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''',
|
||||
webpage)
|
||||
if mobj:
|
||||
try:
|
||||
jwplayer_data = self._parse_json(mobj.group('options'),
|
||||
video_id=video_id,
|
||||
transform_source=transform_source)
|
||||
except ExtractorError:
|
||||
pass
|
||||
else:
|
||||
if isinstance(jwplayer_data, dict):
|
||||
return jwplayer_data
|
||||
return self._search_json(
|
||||
r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''',
|
||||
webpage, 'JWPlayer data', video_id,
|
||||
# must be a {...} or sequence, ending
|
||||
contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))',
|
||||
transform_source=transform_source, default=None)
|
||||
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
|
||||
def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs):
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
webpage, video_id, transform_source=transform_source)
|
||||
return self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, *args, **kwargs)
|
||||
|
||||
|
@ -3426,22 +3425,14 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
|
|||
mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
|
||||
|
||||
subtitles = {}
|
||||
tracks = video_data.get('tracks')
|
||||
if tracks and isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
track_kind = track.get('kind')
|
||||
if not track_kind or not isinstance(track_kind, str):
|
||||
continue
|
||||
if track_kind.lower() not in ('captions', 'subtitles'):
|
||||
continue
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
for track in traverse_obj(video_data, (
|
||||
'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))):
|
||||
track_url = urljoin(base_url, track.get('file'))
|
||||
if not track_url:
|
||||
continue
|
||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||
'url': self._proto_relative_url(track_url)
|
||||
})
|
||||
|
||||
entry = {
|
||||
'id': this_video_id,
|
||||
|
@ -3526,7 +3517,7 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
|
|||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||
# of jwplayer.flash.swf
|
||||
rtmp_url_parts = re.split(
|
||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||
r'((?:mp4|mp3|flv):)', source_url, maxsplit=1)
|
||||
if len(rtmp_url_parts) == 3:
|
||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||
a_format.update({
|
||||
|
|
|
@ -40,3 +40,19 @@ def _real_extract(self, url):
|
|||
'Your URL starts with a Byte Order Mark (BOM). '
|
||||
'Removing the BOM and looking for "%s" ...' % real_url)
|
||||
return self.url_result(real_url)
|
||||
|
||||
|
||||
class BlobIE(InfoExtractor):
|
||||
IE_DESC = False
|
||||
_VALID_URL = r'blob:'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'blob:https://www.youtube.com/4eb3d090-a761-46e6-8083-c32016a36e3b',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
raise ExtractorError(
|
||||
'You\'ve asked yt-dlp to download a blob URL. '
|
||||
'A blob URL exists only locally in your browser. '
|
||||
'It is not possible for yt-dlp to access it.', expected=True)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from .theplatform import ThePlatformFeedIE
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
|
@ -13,7 +14,6 @@
|
|||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
|
@ -24,6 +25,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
_BASE_URL = 'https://www.crunchyroll.com'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27'
|
||||
_REFRESH_TOKEN = None
|
||||
_AUTH_HEADERS = None
|
||||
_AUTH_EXPIRY = None
|
||||
|
@ -179,10 +181,19 @@ def _extract_stream(self, identifier, display_id=None):
|
|||
display_id = identifier
|
||||
|
||||
self._update_auth()
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT}
|
||||
try:
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers)
|
||||
except ExtractorError as error:
|
||||
if self.get_param('ignore_no_formats_error'):
|
||||
self.report_warning(error.orig_msg)
|
||||
return [], {}
|
||||
elif isinstance(error.cause, HTTPError) and error.cause.status == 420:
|
||||
raise ExtractorError(
|
||||
'You have reached the rate-limit for active streams; try again later', expected=True)
|
||||
raise
|
||||
|
||||
available_formats = {'': ('', '', stream_response['url'])}
|
||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||
|
@ -211,7 +222,7 @@ def _extract_stream(self, identifier, display_id=None):
|
|||
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
|
||||
self._merge_subtitles(dash_subs, target=subtitles)
|
||||
else:
|
||||
continue # XXX: Update this if/when meta mpd formats are working
|
||||
continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_locale
|
||||
|
@ -221,6 +232,15 @@ def _extract_stream(self, identifier, display_id=None):
|
|||
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
|
||||
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
|
||||
|
||||
# Invalidate stream token to avoid rate-limit
|
||||
error_msg = 'Unable to invalidate stream token; you may experience rate-limiting'
|
||||
if stream_token := stream_response.get('token'):
|
||||
self._request_webpage(Request(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive',
|
||||
headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False)
|
||||
else:
|
||||
self.report_warning(error_msg)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .senategov import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
from ..compat import compat_HTMLParseError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
find_xpath_attr,
|
||||
get_element_by_attribute,
|
||||
|
@ -19,8 +21,6 @@
|
|||
str_to_int,
|
||||
unescapeHTML,
|
||||
)
|
||||
from .senategov import SenateISVPIE
|
||||
from .ustream import UstreamIE
|
||||
|
||||
|
||||
class CSpanIE(InfoExtractor):
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import unified_timestamp
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import unified_timestamp
|
||||
|
||||
|
||||
class CtsNewsIE(InfoExtractor):
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
determine_protocol,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
|
||||
from ..compat import compat_str
|
||||
from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
|
||||
|
||||
|
||||
class DamtomoBaseIE(InfoExtractor):
|
||||
|
|
197
yt_dlp/extractor/dangalplay.py
Normal file
197
yt_dlp/extractor/dangalplay.py
Normal file
|
@ -0,0 +1,197 @@
|
|||
import hashlib
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DangalPlayBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'dangalplay'
|
||||
_OTV_USER_ID = None
|
||||
_LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage'
|
||||
_API_BASE = 'https://ottapi.dangalplay.com'
|
||||
_AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
|
||||
_SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._OTV_USER_ID:
|
||||
return
|
||||
if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password):
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
self._OTV_USER_ID = password
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._OTV_USER_ID:
|
||||
self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None)
|
||||
|
||||
def _extract_episode_info(self, metadata, episode_slug, series_slug):
|
||||
return {
|
||||
'display_id': episode_slug,
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)),
|
||||
'season_number': int_or_none(self._search_regex(
|
||||
r'season-(\d+)', series_slug, 'season number', default='1')),
|
||||
'series': series_slug,
|
||||
**traverse_obj(metadata, {
|
||||
'id': ('content_id', {str}),
|
||||
'title': ('display_title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'series': ('show_name', {str}, {lambda x: x or None}),
|
||||
'series_id': ('catalog_id', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'release_timestamp': ('release_date_uts', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}):
|
||||
return self._download_json(
|
||||
f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
|
||||
headers={'Accept': 'application/json'}, query={
|
||||
'auth_token': self._AUTH_TOKEN,
|
||||
'region': 'IN',
|
||||
**query,
|
||||
})
|
||||
|
||||
|
||||
class DangalPlayIE(DangalPlayBaseIE):
|
||||
IE_NAME = 'dangalplay'
|
||||
_VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<series>[^/?#]+)/(?P<id>(?!episodes)[^/?#]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01',
|
||||
'info_dict': {
|
||||
'id': '647c61dc1e7171310dcd49b4',
|
||||
'ext': 'mp4',
|
||||
'release_timestamp': 1262304000,
|
||||
'episode_number': 1,
|
||||
'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
|
||||
'series': 'kitani-mohabbat-hai-season-2',
|
||||
'season_number': 2,
|
||||
'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2',
|
||||
'release_date': '20100101',
|
||||
'duration': 2325,
|
||||
'season': 'Season 2',
|
||||
'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01',
|
||||
'series_id': '645c9ea41e717158ca574966',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01',
|
||||
'info_dict': {
|
||||
'id': '65d31d9ba73b9c3abd14a7f3',
|
||||
'ext': 'mp4',
|
||||
'episode': 'EP 1 | MILKE BHI HUM NA MILE',
|
||||
'release_timestamp': 1708367411,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'title': 'EP 1 | MILKE BHI HUM NA MILE',
|
||||
'duration': 156048,
|
||||
'release_date': '20240219',
|
||||
'season_number': 1,
|
||||
'series': 'MILKE BHI HUM NA MILE',
|
||||
'series_id': '645c9ea41e717158ca574966',
|
||||
'display_id': 'milke-bhi-hum-na-mile-ep-number-01',
|
||||
},
|
||||
}]
|
||||
|
||||
def _generate_api_data(self, data):
|
||||
catalog_id = data['catalog_id']
|
||||
content_id = data['content_id']
|
||||
timestamp = str(int(time.time()))
|
||||
unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY))
|
||||
|
||||
return json.dumps({
|
||||
'catalog_id': catalog_id,
|
||||
'content_id': content_id,
|
||||
'category': '',
|
||||
'region': 'IN',
|
||||
'auth_token': self._AUTH_TOKEN,
|
||||
'id': self._OTV_USER_ID,
|
||||
'md5': hashlib.md5(unhashed.encode()).hexdigest(),
|
||||
'ts': timestamp,
|
||||
}, separators=(',', ':')).encode()
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_slug, episode_slug = self._match_valid_url(url).group('series', 'id')
|
||||
metadata = self._call_api(
|
||||
f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip',
|
||||
episode_slug, query={'item_language': ''})['data']
|
||||
|
||||
try:
|
||||
details = self._download_json(
|
||||
f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug,
|
||||
'Downloading playback details JSON', headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
}, data=self._generate_api_data(metadata))['data']
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 422:
|
||||
error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
|
||||
if error_info.get('code') == '1016':
|
||||
self.raise_login_required(
|
||||
f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
|
||||
elif msg := error_info.get('message'):
|
||||
raise ExtractorError(msg)
|
||||
raise
|
||||
|
||||
m3u8_url = traverse_obj(details, (
|
||||
('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4')
|
||||
|
||||
return {
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**self._extract_episode_info(metadata, episode_slug, series_slug),
|
||||
}
|
||||
|
||||
|
||||
class DangalPlaySeasonIE(DangalPlayBaseIE):
|
||||
IE_NAME = 'dangalplay:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<id>[^/?#]+)(?:/(?P<sub>ep-[^/?#]+)/episodes)?/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1',
|
||||
'playlist_mincount': 170,
|
||||
'info_dict': {
|
||||
'id': 'kitani-mohabbat-hai-season-1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes',
|
||||
'playlist_count': 30,
|
||||
'info_dict': {
|
||||
'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1',
|
||||
},
|
||||
}, {
|
||||
# 1 season only, series page is season page
|
||||
'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile',
|
||||
'playlist_mincount': 15,
|
||||
'info_dict': {
|
||||
'id': 'milke-bhi-hum-na-mile',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, subcategories, series_slug):
|
||||
for subcategory in subcategories:
|
||||
data = self._call_api(
|
||||
f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip',
|
||||
series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={
|
||||
'order_by': 'asc',
|
||||
'status': 'published',
|
||||
})
|
||||
for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])):
|
||||
episode_slug = ep['friendly_id']
|
||||
yield self.url_result(
|
||||
f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}',
|
||||
DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug))
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_slug, subcategory = self._match_valid_url(url).group('id', 'sub')
|
||||
subcategories = [subcategory] if subcategory else traverse_obj(
|
||||
self._call_api(
|
||||
f'catalogs/shows/items/{series_slug}.gzip', series_slug,
|
||||
'Downloading season info JSON', query={'item_language': ''}),
|
||||
('data', 'subcategories', ..., 'friendly_id', {str}))
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory))
|
|
@ -1,11 +1,11 @@
|
|||
import re
|
||||
import os.path
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
url_basename,
|
||||
remove_start,
|
||||
url_basename,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_resolution,
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
|
|
|
@ -2,10 +2,10 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
unified_strdate,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import time
|
||||
import hashlib
|
||||
import time
|
||||
import urllib
|
||||
import uuid
|
||||
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
remove_start,
|
||||
|
@ -355,12 +355,10 @@ def _download_video_playback_info(self, disco_base, video_id, headers):
|
|||
video_id, headers=headers, data=json.dumps({
|
||||
'deviceInfo': {
|
||||
'adBlocker': False,
|
||||
'drmSupported': False,
|
||||
},
|
||||
'videoId': video_id,
|
||||
'wisteriaProperties': {
|
||||
'platform': 'desktop',
|
||||
'product': self._PRODUCT,
|
||||
},
|
||||
'wisteriaProperties': {},
|
||||
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -878,10 +876,31 @@ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
|||
})
|
||||
|
||||
|
||||
class DiscoveryNetworksDeIE(DPlayBaseIE):
|
||||
class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold',
|
||||
'info_dict': {
|
||||
'id': '4756322',
|
||||
'ext': 'mp4',
|
||||
'title': 'German Gold',
|
||||
'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6',
|
||||
'display_id': 'goldrausch-in-australien/german-gold',
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 5',
|
||||
'season_number': 5,
|
||||
'series': 'Goldrausch in Australien',
|
||||
'duration': 2648.0,
|
||||
'upload_date': '20230517',
|
||||
'timestamp': 1684357500,
|
||||
'creators': ['DMAX'],
|
||||
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg',
|
||||
'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||
'info_dict': {
|
||||
'id': '78867',
|
||||
|
@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE):
|
|||
'season_number': 1,
|
||||
'thumbnail': r're:https://.+\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
|
||||
'only_matching': True,
|
||||
|
@ -920,8 +937,14 @@ def _real_extract(self, url):
|
|||
country = 'GB' if domain == 'dplay.co.uk' else 'DE'
|
||||
realm = 'questuk' if country == 'GB' else domain.replace('.', '')
|
||||
return self._get_disco_api_info(
|
||||
url, '%s/%s' % (programme, alternate_id),
|
||||
'sonic-eu1-prod.disco-api.com', realm, country)
|
||||
url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country)
|
||||
|
||||
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||
headers.update({
|
||||
'x-disco-params': f'realm={realm}',
|
||||
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
|
||||
'Authorization': self._get_auth(disco_base, display_id, realm),
|
||||
})
|
||||
|
||||
|
||||
class DiscoveryPlusShowBaseIE(DPlayBaseIE):
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
NO_DEFAULT,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
)
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
|
|
|
@ -2,15 +2,15 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class DWIE(InfoExtractor):
|
||||
|
|
|
@ -1,105 +0,0 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
get_elements_by_class,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class EinthusanIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
||||
'info_dict': {
|
||||
'id': '9097',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ae Dil Hai Mushkil',
|
||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.com/movie/watch/9097/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
||||
def _decrypt(self, encrypted_data, video_id):
|
||||
return self._parse_json(compat_b64decode((
|
||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
||||
)).decode('utf-8'), video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
|
||||
|
||||
player_params = extract_attributes(self._search_regex(
|
||||
r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
|
||||
|
||||
page_id = self._html_search_regex(
|
||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
||||
video_data = self._download_json(
|
||||
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
||||
data=urlencode_postdata({
|
||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
||||
'xJson': json.dumps({
|
||||
'EJOutcomes': player_params['data-ejpingables'],
|
||||
'NativeHLS': False
|
||||
}),
|
||||
'arcVersion': 3,
|
||||
'appVersion': 59,
|
||||
'gorilla.csrf.Token': page_id,
|
||||
}))['Data']
|
||||
|
||||
if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
|
||||
raise ExtractorError(
|
||||
'Download rate reached. Please try again later.', expected=True)
|
||||
|
||||
ej_links = self._decrypt(video_data['EJLinks'], video_id)
|
||||
|
||||
formats = []
|
||||
|
||||
m3u8_url = ej_links.get('HLSLink')
|
||||
if m3u8_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
|
||||
|
||||
mp4_url = ej_links.get('MP4Link')
|
||||
if mp4_url:
|
||||
formats.append({
|
||||
'url': mp4_url,
|
||||
})
|
||||
|
||||
description = get_elements_by_class('synopsis', webpage)[0]
|
||||
thumbnail = self._html_search_regex(
|
||||
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
|
||||
webpage, 'thumbnail url', fatal=False, group='url')
|
||||
if thumbnail is not None:
|
||||
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'description': description,
|
||||
}
|
|
@ -16,13 +16,31 @@ class EplusIbIE(InfoExtractor):
|
|||
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
||||
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=41K6Wzbr3PlcMD%2FOKHFlC%2FcZCe2Eaw7FK%2BpJS1ooUHki8d0vGSy2mYqxillQBe1dSnOxU%2B8%2FzXKls4XPBSb3vw%3D%3D',
|
||||
'info_dict': {
|
||||
'id': '354502-0001-002',
|
||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
||||
'id': '335699-0001-006',
|
||||
'title': '少女☆歌劇 レヴュースタァライト -The LIVE 青嵐- BLUE GLITTER <定点映像配信>【Streaming+(配信)】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20211231',
|
||||
'release_timestamp': 1640952000,
|
||||
'release_date': '20201221',
|
||||
'release_timestamp': 1608544800,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'This event may not be accessible',
|
||||
'No video formats found',
|
||||
'Requested format is not available',
|
||||
],
|
||||
}, {
|
||||
'url': 'https://live.eplus.jp/ex/player?ib=6QSsQdyRAwOFZrEHWlhRm7vocgV%2FO0YzBZ%2BaBEBg1XR%2FmbLn0R%2F048dUoAY038%2F%2F92MJ73BsoAtvUpbV6RLtDQ%3D%3D&show_id=2371511',
|
||||
'info_dict': {
|
||||
'id': '348021-0054-001',
|
||||
'title': 'ラブライブ!スーパースター!! Liella! First LoveLive! Tour ~Starlines~【東京/DAY.1】',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20220115',
|
||||
'release_timestamp': 1642233600,
|
||||
'description': str,
|
||||
},
|
||||
'params': {
|
||||
|
@ -124,6 +142,10 @@ def _real_extract(self, url):
|
|||
if data_json.get('drm_mode') == 'ON':
|
||||
self.report_drm(video_id)
|
||||
|
||||
if data_json.get('is_pass_ticket') == 'YES':
|
||||
raise ExtractorError(
|
||||
'This URL is for a pass ticket instead of a player page', expected=True)
|
||||
|
||||
delivery_status = data_json.get('delivery_status')
|
||||
archive_mode = data_json.get('archive_mode')
|
||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||
|
|
|
@ -4,15 +4,15 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_qs,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
qualities,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
xpath_text
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
@ -94,13 +94,14 @@ def get_item(type_, preference):
|
|||
|
||||
class EuroParlWebstreamIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
|
||||
(?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
|
||||
https?://multimedia\.europarl\.europa\.eu/
|
||||
(?:\w+/)?webstreaming/(?:[\w-]+_)?(?P<id>[\w-]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
||||
'info_dict': {
|
||||
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
|
||||
'display_id': '20220914-0900-PLENARY',
|
||||
'ext': 'mp4',
|
||||
'title': 'Plenary session',
|
||||
'release_timestamp': 1663139069,
|
||||
|
@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
|
|||
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
|
||||
'info_dict': {
|
||||
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
|
||||
'display_id': '20230301-1130-COMMITTEE-CULT',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20230301',
|
||||
'title': 'Committee on Culture and Education',
|
||||
|
@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
|
|||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'Not live anymore'
|
||||
}, {
|
||||
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
|
||||
'info_dict': {
|
||||
'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
|
||||
'display_id': '20240320-1345-SPECIAL-PRESSER',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20240320',
|
||||
'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
|
||||
'release_timestamp': 1710939767,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -166,6 +181,7 @@ def _real_extract(self, url):
|
|||
|
||||
return {
|
||||
'id': json_info['id'],
|
||||
'display_id': display_id,
|
||||
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
parse_duration,
|
||||
js_to_json,
|
||||
parse_duration,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
xpath_text,
|
||||
parse_duration,
|
||||
ExtractorError,
|
||||
parse_duration,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,12 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
ExtractorError,
|
||||
try_get,
|
||||
mimetype2ext
|
||||
)
|
||||
from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get
|
||||
|
||||
|
||||
class FancodeVodIE(InfoExtractor):
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
from .common import InfoExtractor
|
||||
from ..compat import compat_etree_fromstring
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue