diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c4d3e812e..4deee572f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -28,7 +28,6 @@ # PLEASE FOLLOW THE GUIDE BELOW ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests -- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) ### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 70769f967..fdfdebc65 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -53,7 +53,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi + run: python3 ./devscripts/install_deps.py --include test --include curl-cffi - name: Run tests continue-on-error: False run: | diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 24b34911f..3afb51a30 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -15,13 +15,13 @@ jobs: with: python-version: '3.8' - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include dev + run: python3 ./devscripts/install_deps.py --include test - name: Run tests run: | python3 -m yt_dlp -v || true python3 ./devscripts/run_tests.py core - flake8: - name: Linter + check: + name: Code check if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: @@ -29,9 +29,11 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.8' - - name: Install flake8 - run: python3 ./devscripts/install_deps.py -o --include dev + - name: Install dev dependencies + run: python3 ./devscripts/install_deps.py -o --include static-analysis - name: Make lazy extractors run: python3 ./devscripts/make_lazy_extractors.py - - name: Run flake8 - run: flake8 . + - name: Run ruff + run: ruff check --output-format github . + - name: Run autopep8 + run: autopep8 --diff . diff --git a/.gitignore b/.gitignore index 630c2e01f..db322c4f0 100644 --- a/.gitignore +++ b/.gitignore @@ -67,7 +67,7 @@ cookies # Python *.pyc *.pyo -.pytest_cache +.*_cache wine-py2exe/ py2exe.log build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..a821eeefb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,14 @@ +repos: +- repo: local + hooks: + - id: linter + name: Apply linter fixes + entry: ruff check --fix . + language: system + types: [python] + require_serial: true + - id: format + name: Apply formatting fixes + entry: autopep8 --in-place . + language: system + types: [python] diff --git a/.pre-commit-hatch.yaml b/.pre-commit-hatch.yaml new file mode 100644 index 000000000..fb7d25e1d --- /dev/null +++ b/.pre-commit-hatch.yaml @@ -0,0 +1,9 @@ +repos: +- repo: local + hooks: + - id: fix + name: Apply code fixes + entry: hatch fmt + language: system + types: [python] + require_serial: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c94ec55a6..837b600e3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -134,18 +134,53 @@ ### Is the website primarily used for piracy? # DEVELOPER INSTRUCTIONS -Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation). +Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases), get them via [the other installation methods](README.md#installation) or directly run it using `python -m yt_dlp`. -To run yt-dlp as a developer, you don't need to build anything either. Simply execute +`yt-dlp` uses [`hatch`]() as a project management tool. +You can easily install it using [`pipx`]() via `pipx install hatch`, or else via `pip` or your package manager of choice. Make sure you are using at least version `1.10.0`, otherwise some functionality might not work as expected. - python3 -m yt_dlp +If you plan on contributing to `yt-dlp`, best practice is to start by running the following command: -To run all the available core tests, use: +```shell +$ hatch run setup +``` - python3 devscripts/run_tests.py +The above command will install a `pre-commit` hook so that required checks/fixes (linting, formatting) will run automatically before each commit. If any code needs to be linted or formatted, then the commit will be blocked and the necessary changes will be made; you should review all edits and re-commit the fixed version. + +After this you can use `hatch shell` to enable a virtual environment that has `yt-dlp` and its development dependencies installed. + +In addition, the following script commands can be used to run simple tasks such as linting or testing (without having to run `hatch shell` first): +* `hatch fmt`: Automatically fix linter violations and apply required code formatting changes + * See `hatch fmt --help` for more info +* `hatch test`: Run extractor or core tests + * See `hatch test --help` for more info See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. +While it is strongly recommended to use `hatch` for yt-dlp development, if you are unable to do so, alternatively you can manually create a virtual environment and use the following commands: + +```shell +# To only install development dependencies: +$ python -m devscripts.install_deps --include dev + +# Or, for an editable install plus dev dependencies: +$ python -m pip install -e ".[default,dev]" + +# To setup the pre-commit hook: +$ pre-commit install + +# To be used in place of `hatch test`: +$ python -m devscripts.run_tests + +# To be used in place of `hatch fmt`: +$ ruff check --fix . +$ autopep8 --in-place . + +# To only check code instead of applying fixes: +$ ruff check . +$ autopep8 --diff . +``` + If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile). @@ -165,12 +200,16 @@ ## Adding support for a new site 1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork) 1. Check out the source code with: - git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git + ```shell + $ git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git + ``` 1. Start a new git branch with - cd yt-dlp - git checkout -b yourextractor + ```shell + $ cd yt-dlp + $ git checkout -b yourextractor + ``` 1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: @@ -217,21 +256,27 @@ ## Adding support for a new site # TODO more properties (see yt_dlp/extractor/common.py) } ``` -1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. -1. Run `python3 devscripts/run_tests.py YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all` +1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. Also note that when adding a parenthesized import group, the last import in the group must have a trailing comma in order for this formatting to be respected by our code formatter. +1. Run `hatch test YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all` 1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want. -1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): +1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions), passes [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started) code checks and is properly formatted: - $ flake8 yt_dlp/extractor/yourextractor.py + ```shell + $ hatch fmt --check + ``` + + You can use `hatch fmt` to automatically fix problems. 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: - $ git add yt_dlp/extractor/_extractors.py - $ git add yt_dlp/extractor/yourextractor.py - $ git commit -m '[yourextractor] Add extractor' - $ git push origin yourextractor + ```shell + $ git add yt_dlp/extractor/_extractors.py + $ git add yt_dlp/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add extractor' + $ git push origin yourextractor + ``` 1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 8b5d19a64..e0d1668ee 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -610,3 +610,24 @@ Offert4324 sta1us Tomoka1 trwstin +alexhuot1 +clienthax +DaPotato69 +emqi +hugohaa +imanoreotwe +JakeFinley96 +lostfictions +minamotorin +ocococococ +Podiumnoche +RasmusAntons +roeniss +shoxie007 +Szpachlarz +The-MAGI +TuxCoder +voidful +vtexier +WyohKnott +trueauracoral diff --git a/Changelog.md b/Changelog.md index 6cf08beab..267330208 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,127 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.05.27 + +#### Extractor changes +- [Fix parsing of base URL in SMIL manifest](https://github.com/yt-dlp/yt-dlp/commit/26603d0b34898818992bee4598e0607c07059511) ([#9225](https://github.com/yt-dlp/yt-dlp/issues/9225)) by [seproDev](https://github.com/seproDev) +- **peertube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/12b248ce60be1aa1362edd839d915bba70dbee4b) ([#10044](https://github.com/yt-dlp/yt-dlp/issues/10044)) by [bashonly](https://github.com/bashonly), [trueauracoral](https://github.com/trueauracoral) +- **piksel**: [Update domain](https://github.com/yt-dlp/yt-dlp/commit/ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b) ([#9223](https://github.com/yt-dlp/yt-dlp/issues/9223)) by [seproDev](https://github.com/seproDev) +- **tiktok**: user: [Fix extraction loop](https://github.com/yt-dlp/yt-dlp/commit/c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079) ([#10035](https://github.com/yt-dlp/yt-dlp/issues/10035)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **cleanup**: Miscellaneous: [5e3e19c](https://github.com/yt-dlp/yt-dlp/commit/5e3e19c93c52830da98d9d1ed84ea7a559efefbd) by [bashonly](https://github.com/bashonly) + +### 2024.05.26 + +#### Core changes +- [Better warning when requested subs format not found](https://github.com/yt-dlp/yt-dlp/commit/7e4259dff0b681a3f0e8a930799ce0394328c86e) ([#9873](https://github.com/yt-dlp/yt-dlp/issues/9873)) by [DaPotato69](https://github.com/DaPotato69) +- [Merged with youtube-dl a08f2b7](https://github.com/yt-dlp/yt-dlp/commit/a4da9db87b6486b270c15dfa07ab5bfedc83f6bd) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- [Warn if lack of ffmpeg alters format selection](https://github.com/yt-dlp/yt-dlp/commit/96da9525043f78aca4544d01761b13b2140e9ae6) ([#9805](https://github.com/yt-dlp/yt-dlp/issues/9805)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **cookies** + - [Add `--cookies-from-browser` support for Whale](https://github.com/yt-dlp/yt-dlp/commit/dd9ad97b1fbdd36c086b8ba82328a4d954f78f8e) ([#9649](https://github.com/yt-dlp/yt-dlp/issues/9649)) by [roeniss](https://github.com/roeniss) + - [Get chrome session cookies with `--cookies-from-browser`](https://github.com/yt-dlp/yt-dlp/commit/f1f158976e38d38a260762accafe7bbe6d451151) ([#9747](https://github.com/yt-dlp/yt-dlp/issues/9747)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- **windows**: [Improve shell quoting and tests](https://github.com/yt-dlp/yt-dlp/commit/64766459e37451b665c1464073c28361fbcf1c25) ([#9802](https://github.com/yt-dlp/yt-dlp/issues/9802)) by [Grub4K](https://github.com/Grub4K) (With fixes in [7e26bd5](https://github.com/yt-dlp/yt-dlp/commit/7e26bd53f9c5893518fde81dfd0079ec08dd841e)) + +#### Extractor changes +- [Add POST data hash to `--write-pages` filenames](https://github.com/yt-dlp/yt-dlp/commit/61b17437dc14a1c7e90ff48a6198df77828c6df4) ([#9879](https://github.com/yt-dlp/yt-dlp/issues/9879)) by [minamotorin](https://github.com/minamotorin) (With fixes in [c999bac](https://github.com/yt-dlp/yt-dlp/commit/c999bac02c5a4f755b2a82488a975e91c988ffd8) by [bashonly](https://github.com/bashonly)) +- [Make `_search_nextjs_data` non fatal](https://github.com/yt-dlp/yt-dlp/commit/3ee1194288981c4f2c4abd8315326de0c424d2ce) ([#8937](https://github.com/yt-dlp/yt-dlp/issues/8937)) by [Grub4K](https://github.com/Grub4K) +- **afreecatv**: live: [Add `cdn` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/315b3544296bb83012e20ee3af9d3cbf5600dd1c) ([#9666](https://github.com/yt-dlp/yt-dlp/issues/9666)) by [bashonly](https://github.com/bashonly) +- **alura**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc2879ecb05aaad36869609d154e4321362c1f63) ([#9658](https://github.com/yt-dlp/yt-dlp/issues/9658)) by [hugohaa](https://github.com/hugohaa) +- **artetv**: [Label forced subtitles](https://github.com/yt-dlp/yt-dlp/commit/7b5674949fd03a33b47b67b31d56a5adf1c48c91) ([#9945](https://github.com/yt-dlp/yt-dlp/issues/9945)) by [vtexier](https://github.com/vtexier) +- **bbc**: [Fix and extend extraction](https://github.com/yt-dlp/yt-dlp/commit/7975ddf245d22af034d5b983eeb1c5ec6c2ce053) ([#9705](https://github.com/yt-dlp/yt-dlp/issues/9705)) by [dirkf](https://github.com/dirkf), [kylegustavo](https://github.com/kylegustavo), [pukkandan](https://github.com/pukkandan) +- **bilibili**: [Fix `--geo-verification-proxy` support](https://github.com/yt-dlp/yt-dlp/commit/2338827072dacab0f15348b70aec8685feefc8d1) ([#9817](https://github.com/yt-dlp/yt-dlp/issues/9817)) by [fireattack](https://github.com/fireattack) +- **bilibilispacevideo** + - [Better error message](https://github.com/yt-dlp/yt-dlp/commit/06d52c87314e0bbc16c43c405090843885577b88) ([#9839](https://github.com/yt-dlp/yt-dlp/issues/9839)) by [fireattack](https://github.com/fireattack) + - [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4cc99d7b6cce8b39506ead01407445d576b63ee4) ([#9905](https://github.com/yt-dlp/yt-dlp/issues/9905)) by [c-basalt](https://github.com/c-basalt) +- **boosty**: [Add cookies support](https://github.com/yt-dlp/yt-dlp/commit/145dc6f6563e80d2da1b3e9aea2ffa795b71622c) ([#9522](https://github.com/yt-dlp/yt-dlp/issues/9522)) by [RasmusAntons](https://github.com/RasmusAntons) +- **brilliantpala**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/eead3bbc01f6529862bdad1f0b2adeabda4f006e) ([#9788](https://github.com/yt-dlp/yt-dlp/issues/9788)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **canalalpha**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/00a9f2e1f7fa69499221f2e8dd73a08efeef79bc) ([#9675](https://github.com/yt-dlp/yt-dlp/issues/9675)) by [kclauhk](https://github.com/kclauhk) +- **cbc.ca**: player: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8bf48f3a8fa29587e7c73ef5a7710385a5ea725) ([#9866](https://github.com/yt-dlp/yt-dlp/issues/9866)) by [carusocr](https://github.com/carusocr) +- **cda**: [Fix age-gated web extraction](https://github.com/yt-dlp/yt-dlp/commit/6d8a53d870ff6795f509085bfbf3981417999038) ([#9939](https://github.com/yt-dlp/yt-dlp/issues/9939)) by [dirkf](https://github.com/dirkf), [emqi](https://github.com/emqi), [Podiumnoche](https://github.com/Podiumnoche), [Szpachlarz](https://github.com/Szpachlarz) +- **commonmistakes**: [Raise error on blob URLs](https://github.com/yt-dlp/yt-dlp/commit/98d71d8c5e5dab08b561ee6f137e968d2a004262) ([#9897](https://github.com/yt-dlp/yt-dlp/issues/9897)) by [seproDev](https://github.com/seproDev) +- **crunchyroll** + - [Always make metadata available](https://github.com/yt-dlp/yt-dlp/commit/cb2fb4a643949322adba561ca73bcba3221ec0c5) ([#9772](https://github.com/yt-dlp/yt-dlp/issues/9772)) by [bashonly](https://github.com/bashonly) + - [Fix auth and remove cookies support](https://github.com/yt-dlp/yt-dlp/commit/ff38a011d57b763f3a69bebd25a5dc9044a717ce) ([#9749](https://github.com/yt-dlp/yt-dlp/issues/9749)) by [bashonly](https://github.com/bashonly) + - [Fix stream extraction](https://github.com/yt-dlp/yt-dlp/commit/f2816634e3be88fe158b342ee33918de3c272a54) ([#10005](https://github.com/yt-dlp/yt-dlp/issues/10005)) by [bashonly](https://github.com/bashonly) + - [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/5904853ae5788509fdc4892cb7ecdfa9ae7f78e6) ([#9857](https://github.com/yt-dlp/yt-dlp/issues/9857)) by [bashonly](https://github.com/bashonly) +- **dangalplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0d067e77c3f5527946fb0c22ee1c7011994cba40) ([#10021](https://github.com/yt-dlp/yt-dlp/issues/10021)) by [bashonly](https://github.com/bashonly) +- **discoveryplus**: [Fix dmax.de and related extractors](https://github.com/yt-dlp/yt-dlp/commit/90d2da311bbb5dc06f385ee428c7e4590936e995) ([#10020](https://github.com/yt-dlp/yt-dlp/issues/10020)) by [bashonly](https://github.com/bashonly) +- **eplus**: [Handle URLs without videos](https://github.com/yt-dlp/yt-dlp/commit/351dc0bc334c4e1b5f00c152818c3ec0ed71f788) ([#9855](https://github.com/yt-dlp/yt-dlp/issues/9855)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **europarlwebstream**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/800a43983e5fb719526ce4cb3956216085c63268) ([#9647](https://github.com/yt-dlp/yt-dlp/issues/9647)) by [seproDev](https://github.com/seproDev), [voidful](https://github.com/voidful) +- **facebook**: [Fix DASH formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e3b42d8b1b8bcfff7ba146c19fc3f6f6ba843cea) ([#9734](https://github.com/yt-dlp/yt-dlp/issues/9734)) by [bashonly](https://github.com/bashonly) +- **godresource**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65e709d23530959075816e966c42179ad46e8e3b) ([#9629](https://github.com/yt-dlp/yt-dlp/issues/9629)) by [HobbyistDev](https://github.com/HobbyistDev) +- **googledrive**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/85ec2a337ac325cf6427cbafd56f0a034c1a5218) ([#9908](https://github.com/yt-dlp/yt-dlp/issues/9908)) by [WyohKnott](https://github.com/WyohKnott) +- **hearthisat**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5bbfdb7c999b22f1aeca0c3489c167d6eb73013b) ([#9949](https://github.com/yt-dlp/yt-dlp/issues/9949)) by [bohwaz](https://github.com/bohwaz), [seproDev](https://github.com/seproDev) +- **hytale**: [Use `CloudflareStreamIE` explicitly](https://github.com/yt-dlp/yt-dlp/commit/31b417e1d1ccc67d5c027bf8878f483dc34cb118) ([#9672](https://github.com/yt-dlp/yt-dlp/issues/9672)) by [llamasblade](https://github.com/llamasblade) +- **instagram**: [Support `/reels/` URLs](https://github.com/yt-dlp/yt-dlp/commit/06cb0638392b607b47d3c2ac48eb2ebecb0f060d) ([#9539](https://github.com/yt-dlp/yt-dlp/issues/9539)) by [amir16yp](https://github.com/amir16yp) +- **jiocinema**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1463945ae5fb05986a0bd1aa02e41d1a08d93a02) ([#10026](https://github.com/yt-dlp/yt-dlp/issues/10026)) by [bashonly](https://github.com/bashonly) +- **jiosaavn**: [Extract via API and fix playlists](https://github.com/yt-dlp/yt-dlp/commit/0c21c53885cf03f4040467ae8c44d7ff51016116) ([#9656](https://github.com/yt-dlp/yt-dlp/issues/9656)) by [bashonly](https://github.com/bashonly) +- **lci**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a2eebc76770fca91ffabeff658d560f716fec80) ([#10025](https://github.com/yt-dlp/yt-dlp/issues/10025)) by [ocococococ](https://github.com/ocococococ) +- **mixch**: [Extract comments](https://github.com/yt-dlp/yt-dlp/commit/b38018b781b062d5169d104ab430489aef8e7f1e) ([#9860](https://github.com/yt-dlp/yt-dlp/issues/9860)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **moviepilot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/296df0da1d38a44d34c99b60a18066c301774537) ([#9366](https://github.com/yt-dlp/yt-dlp/issues/9366)) by [panatexxa](https://github.com/panatexxa) +- **netease**: program: [Improve `--no-playlist` message](https://github.com/yt-dlp/yt-dlp/commit/73f12119b52d98281804b0c072b2ed6aa841ec88) ([#9488](https://github.com/yt-dlp/yt-dlp/issues/9488)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **nfb**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0a1a8e3005f66c44bf67633dccd4df19c3fccd1a) ([#9650](https://github.com/yt-dlp/yt-dlp/issues/9650)) by [rrgomes](https://github.com/rrgomes) +- **ntslive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be7db1a5a8c483726c511c30ea4689cbb8b27962) ([#9641](https://github.com/yt-dlp/yt-dlp/issues/9641)) by [lostfictions](https://github.com/lostfictions) +- **orf**: on: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/0dd53faeca2ba0ce138e4092d07b5f2dbf2422f9) ([#9677](https://github.com/yt-dlp/yt-dlp/issues/9677)) by [TuxCoder](https://github.com/TuxCoder) +- **orftvthek**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/3779f2a307ba3ef1d28e107cdd71b221dfb4eb36) ([#10011](https://github.com/yt-dlp/yt-dlp/issues/10011)) by [seproDev](https://github.com/seproDev) +- **patreon** + - [Extract multiple embeds](https://github.com/yt-dlp/yt-dlp/commit/036e0d92c6052465673d459678322ea03e61483d) ([#9850](https://github.com/yt-dlp/yt-dlp/issues/9850)) by [bashonly](https://github.com/bashonly) + - [Fix Vimeo embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c9ce57d9bf51541da2381d99bc096a9d0ddf1f27) ([#9712](https://github.com/yt-dlp/yt-dlp/issues/9712)) by [bashonly](https://github.com/bashonly) +- **piapro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3ba8de62d61d782256f5c1e9939a0762039657de) ([#9311](https://github.com/yt-dlp/yt-dlp/issues/9311)) by [FinnRG](https://github.com/FinnRG), [seproDev](https://github.com/seproDev) +- **pornhub**: [Fix login by email address](https://github.com/yt-dlp/yt-dlp/commit/518c1afc1592cae3e4eb39dc646b5bc059333112) ([#9914](https://github.com/yt-dlp/yt-dlp/issues/9914)) by [feederbox826](https://github.com/feederbox826) +- **qub**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b54cccdcb892bca3e55993480d8b86f1c7e6da6) ([#7019](https://github.com/yt-dlp/yt-dlp/issues/7019)) by [alexhuot1](https://github.com/alexhuot1), [dirkf](https://github.com/dirkf) +- **reddit**: [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/82f4f4444e26daf35b7302c406fe2312f78f619e) ([#10006](https://github.com/yt-dlp/yt-dlp/issues/10006)) by [kclauhk](https://github.com/kclauhk) +- **soundcloud** + - [Add `formats` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/beaf832c7a9d57833f365ce18f6115b88071b296) ([#10004](https://github.com/yt-dlp/yt-dlp/issues/10004)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Extract `genres`](https://github.com/yt-dlp/yt-dlp/commit/231c2eacc41b06b65c63edf94c0d04768a5da607) ([#9821](https://github.com/yt-dlp/yt-dlp/issues/9821)) by [bashonly](https://github.com/bashonly) +- **taptap**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/63b569bc5e7d461753637a20ad84a575adee4c0a) ([#9776](https://github.com/yt-dlp/yt-dlp/issues/9776)) by [c-basalt](https://github.com/c-basalt) +- **tele5**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d) ([#10024](https://github.com/yt-dlp/yt-dlp/issues/10024)) by [bashonly](https://github.com/bashonly) +- **theatercomplextown**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/8056a3026ed6ec6a6d0ed56fdd7ebcd16e928341) ([#9754](https://github.com/yt-dlp/yt-dlp/issues/9754)) by [bashonly](https://github.com/bashonly) +- **tiktok** + - [Add `device_id` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/3584b8390bd21c0393a3079eeee71aed56a1c1d8) ([#9951](https://github.com/yt-dlp/yt-dlp/issues/9951)) by [bashonly](https://github.com/bashonly) + - [Extract all web formats](https://github.com/yt-dlp/yt-dlp/commit/4ccd73fea0f6f4be343e1ec7f22dd03799addcf8) ([#9960](https://github.com/yt-dlp/yt-dlp/issues/9960)) by [bashonly](https://github.com/bashonly) + - [Extract via mobile API only if extractor-arg is passed](https://github.com/yt-dlp/yt-dlp/commit/41ba4a808b597a3afed78c89675a30deb6844450) ([#9938](https://github.com/yt-dlp/yt-dlp/issues/9938)) by [bashonly](https://github.com/bashonly) + - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/eef1e9f44ff14c5e65b759bb1eafa3946cdaf719) ([#9961](https://github.com/yt-dlp/yt-dlp/issues/9961)) by [bashonly](https://github.com/bashonly) + - collection: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/119d41f27061d220d276a2d38cfc8d873437452a) ([#9986](https://github.com/yt-dlp/yt-dlp/issues/9986)) by [bashonly](https://github.com/bashonly), [imanoreotwe](https://github.com/imanoreotwe) + - user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/347f13dd9bccc2b4db3ea25689410d45d8370ed4) ([#9661](https://github.com/yt-dlp/yt-dlp/issues/9661)) by [bashonly](https://github.com/bashonly) +- **tv5monde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6db96268c521e945d42649607db1574f5d92e082) ([#9143](https://github.com/yt-dlp/yt-dlp/issues/9143)) by [alard](https://github.com/alard), [seproDev](https://github.com/seproDev) +- **twitter** + - [Fix auth for x.com migration](https://github.com/yt-dlp/yt-dlp/commit/3e35aa32c74bc108375be8c8b6b3bfc90dfff1b4) ([#9952](https://github.com/yt-dlp/yt-dlp/issues/9952)) by [bashonly](https://github.com/bashonly) + - [Support x.com URLs](https://github.com/yt-dlp/yt-dlp/commit/4813173e4544f125d6f2afc31e600727d761b8dd) ([#9926](https://github.com/yt-dlp/yt-dlp/issues/9926)) by [bashonly](https://github.com/bashonly) +- **vk**: [Improve format extraction](https://github.com/yt-dlp/yt-dlp/commit/df5c9e733aaba703cf285c0372b6d61629330c82) ([#9885](https://github.com/yt-dlp/yt-dlp/issues/9885)) by [seproDev](https://github.com/seproDev) +- **wrestleuniverse**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/c4853655cb9a793129280806af643de43c48f4d5) ([#9800](https://github.com/yt-dlp/yt-dlp/issues/9800)) by [bashonly](https://github.com/bashonly) +- **xiaohongshu**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2e9031605d87c469be9ce98dbbdf4960b727338) ([#9646](https://github.com/yt-dlp/yt-dlp/issues/9646)) by [HobbyistDev](https://github.com/HobbyistDev) +- **xvideos**: quickies: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b207d26f83fb8ab0ce56df74dff43ff583a3264f) ([#9834](https://github.com/yt-dlp/yt-dlp/issues/9834)) by [JakeFinley96](https://github.com/JakeFinley96) +- **youporn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/351368cb9a6731b886a58f5a10fd6b302bbe47be) ([#8827](https://github.com/yt-dlp/yt-dlp/issues/8827)) by [The-MAGI](https://github.com/The-MAGI) +- **youtube** + - [Add `mediaconnect` client](https://github.com/yt-dlp/yt-dlp/commit/cf212d0a331aba05c32117573f760cdf3af8c62f) ([#9546](https://github.com/yt-dlp/yt-dlp/issues/9546)) by [clienthax](https://github.com/clienthax) + - [Extract upload timestamp if available](https://github.com/yt-dlp/yt-dlp/commit/96a134dea6397a5f2131947c427aac52c8b4e677) ([#9856](https://github.com/yt-dlp/yt-dlp/issues/9856)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/8e15177b4113c355989881e4e030f695a9b59c3a) ([#9775](https://github.com/yt-dlp/yt-dlp/issues/9775)) by [bbilly1](https://github.com/bbilly1), [jakeogh](https://github.com/jakeogh), [minamotorin](https://github.com/minamotorin), [shoxie007](https://github.com/shoxie007) + - [Remove `android` from default clients](https://github.com/yt-dlp/yt-dlp/commit/12d8ea8246fa901de302ff5cc748caddadc82f41) ([#9553](https://github.com/yt-dlp/yt-dlp/issues/9553)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) +- **zenyandex**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c4b87dd885ee5391e5f481e7c8bd550a7c543623) ([#9813](https://github.com/yt-dlp/yt-dlp/issues/9813)) by [src-tinkerer](https://github.com/src-tinkerer) + +#### Networking changes +- [Add `extensions` attribute to `Response`](https://github.com/yt-dlp/yt-dlp/commit/bec9a59e8ec82c18e3bf9268eaa436793dd52e35) ([#9756](https://github.com/yt-dlp/yt-dlp/issues/9756)) by [bashonly](https://github.com/bashonly) +- **Request Handler** + - requests + - [Patch support for `requests` 2.32.2+](https://github.com/yt-dlp/yt-dlp/commit/3f7999533ebe41c2a579d91b4e4cb211cfcd3bc0) ([#9992](https://github.com/yt-dlp/yt-dlp/issues/9992)) by [Grub4K](https://github.com/Grub4K) + - [Update to `requests` 2.32.0](https://github.com/yt-dlp/yt-dlp/commit/c36513f1be2ef3d3cec864accbffda1afaa06ffd) ([#9980](https://github.com/yt-dlp/yt-dlp/issues/9980)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- [Add `hatch`, `ruff`, `pre-commit` and improve dev docs](https://github.com/yt-dlp/yt-dlp/commit/e897bd8292a41999cf51dba91b390db5643c72db) ([#7409](https://github.com/yt-dlp/yt-dlp/issues/7409)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **build** + - [Migrate `linux_exe` to static musl builds](https://github.com/yt-dlp/yt-dlp/commit/ac817bc83efd939dca3e40c4b527d0ccfc77172b) ([#9811](https://github.com/yt-dlp/yt-dlp/issues/9811)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Normalize `curl_cffi` group to `curl-cffi`](https://github.com/yt-dlp/yt-dlp/commit/02483bea1c4dbe1bace8ca4d19700104fbb8a00f) ([#9698](https://github.com/yt-dlp/yt-dlp/issues/9698)) by [bashonly](https://github.com/bashonly) (With fixes in [89f535e](https://github.com/yt-dlp/yt-dlp/commit/89f535e2656964b4061c25a7739d4d6ba0a30568)) + - [Run `macos_legacy` job on `macos-12`](https://github.com/yt-dlp/yt-dlp/commit/1a366403d9c26b992faa77e00f4d02ead57559e3) ([#9804](https://github.com/yt-dlp/yt-dlp/issues/9804)) by [bashonly](https://github.com/bashonly) + - [`macos` job requires `setuptools<70`](https://github.com/yt-dlp/yt-dlp/commit/78c57cc0e0998b8ed90e4306f410aa4be4115cd7) ([#9993](https://github.com/yt-dlp/yt-dlp/issues/9993)) by [bashonly](https://github.com/bashonly) +- **cleanup** + - [Remove questionable extractors](https://github.com/yt-dlp/yt-dlp/commit/01395a34345d1c6ba1b73ca92f94dd200dc45341) ([#9911](https://github.com/yt-dlp/yt-dlp/issues/9911)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [5c019f6](https://github.com/yt-dlp/yt-dlp/commit/5c019f6328ad40d66561eac3c4de0b3cd070d0f6), [ae2af11](https://github.com/yt-dlp/yt-dlp/commit/ae2af1104f80caf2f47544763a33db2c17a3e1de) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **test** + - [Add HTTP proxy tests](https://github.com/yt-dlp/yt-dlp/commit/3c7a287e281d9f9a353dce8902ff78a84c24a040) ([#9578](https://github.com/yt-dlp/yt-dlp/issues/9578)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix connect timeout test](https://github.com/yt-dlp/yt-dlp/commit/53b4d44f55cca66ac33dab092ef2a30b1164b684) ([#9906](https://github.com/yt-dlp/yt-dlp/issues/9906)) by [coletdjnz](https://github.com/coletdjnz) + ### 2024.04.09 #### Important changes diff --git a/Makefile b/Makefile index cef4bc6cb..e1de7f3e9 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ clean-dist: yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS clean-cache: find . \( \ - -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ + -type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ \) -prune -exec rm -rf {} \; completion-bash: completions/bash/yt-dlp @@ -70,14 +70,15 @@ uninstall: rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish codetest: - flake8 . + ruff check . + autopep8 --diff . test: - $(PYTHON) -m pytest + $(PYTHON) -m pytest -Werror $(MAKE) codetest offlinetest: codetest - $(PYTHON) -m pytest -k "not download" + $(PYTHON) -m pytest -Werror -m "not download" CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort CODE_FOLDERS != $(CODE_FOLDERS_CMD) @@ -151,7 +152,7 @@ yt-dlp.tar.gz: all --exclude '*.pyo' \ --exclude '*~' \ --exclude '__pycache__' \ - --exclude '.pytest_cache' \ + --exclude '.*_cache' \ --exclude '.git' \ -- \ README.md supportedsites.md Changelog.md LICENSE \ diff --git a/README.md b/README.md index 887cfde23..e8cd6d3a0 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,6 @@ #### Alternatives [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`
([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary -[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update) [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) @@ -170,7 +169,7 @@ # To update to nightly from stable executable/binary: yt-dlp --update-to nightly # To install nightly with pip: -python3 -m pip install -U --pre yt-dlp[default] +python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES @@ -202,7 +201,7 @@ #### Impersonation The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) - * Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]` + * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds @@ -402,6 +401,9 @@ ## Network Options: --impersonate CLIENT[:OS] Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. Pass --impersonate="" to impersonate any client. + Note that forcing impersonation for all + requests may have a detrimental impact on + download speed and stability --list-impersonate-targets List available clients to impersonate. -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 @@ -666,16 +668,17 @@ ## Filesystem Options: The name of the browser to load cookies from. Currently supported browsers are: brave, chrome, chromium, edge, firefox, - opera, safari, vivaldi, whale. Optionally, the - KEYRING used for decrypting Chromium cookies - on Linux, the name/path of the PROFILE to - load cookies from, and the CONTAINER name - (if Firefox) ("none" for no container) can - be given with their respective seperators. - By default, all containers of the most - recently accessed profile are used. - Currently supported keyrings are: basictext, - gnomekeyring, kwallet, kwallet5, kwallet6 + opera, safari, vivaldi, whale. Optionally, + the KEYRING used for decrypting Chromium + cookies on Linux, the name/path of the + PROFILE to load cookies from, and the + CONTAINER name (if Firefox) ("none" for no + container) can be given with their + respective seperators. By default, all + containers of the most recently accessed + profile are used. Currently supported + keyrings are: basictext, gnomekeyring, + kwallet, kwallet5, kwallet6 --no-cookies-from-browser Do not load cookies from browser (default) --cache-dir DIR Location in the filesystem where yt-dlp can store some downloaded information (such as @@ -1751,7 +1754,7 @@ # Replace all spaces and "_" in title and uploader with a `-` # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;formats=incomplete" --extractor-args "funimation:version=uncut"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` @@ -1835,6 +1838,9 @@ #### nhkradirulive (NHK らじる★らじる LIVE) #### nflplusreplay * `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default +#### jiocinema +* `refresh_token`: The `refreshToken` UUID from browser local storage can be passed to extend the life of your login session when logging in with `token` as username and the `accessToken` from browser local storage as password + #### jiosaavn * `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320` @@ -2120,7 +2126,7 @@ # CHANGES FROM YOUTUBE-DL ### New features -* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) +* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@a08f2b7**](https://github.com/ytdl-org/youtube-dl/commit/a08f2b7e4567cdc50c0614ee0a4ffdff49b8b6e6) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API @@ -2330,6 +2336,7 @@ #### No longer supported --write-annotations No supported site has annotations now --no-write-annotations Default --compat-options seperate-video-versions No longer needed + --compat-options no-youtube-prefer-utc-upload-date No longer supported #### Removed These options were deprecated since 2014 and have now been entirely removed diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 046060cb2..86e8ec2f9 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -147,5 +147,27 @@ "action": "add", "when": "9590cc6b4768e190183d7d071a6c78170889116a", "short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly." + }, + { + "action": "change", + "when": "41ba4a808b597a3afed78c89675a30deb6844450", + "short": "[ie/tiktok] Extract via mobile API only if extractor-arg is passed (#9938)", + "authors": ["bashonly"] + }, + { + "action": "remove", + "when": "6e36d17f404556f0e3a43f441c477a71a91877d9" + }, + { + "action": "change", + "when": "beaf832c7a9d57833f365ce18f6115b88071b296", + "short": "[ie/soundcloud] Add `formats` extractor-arg (#10004)", + "authors": ["bashonly", "Grub4K"] + }, + { + "action": "change", + "when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6", + "short": "[cleanup] Misc (#9765)", + "authors": ["bashonly", "Grub4K", "seproDev"] } ] diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py index d33fc637c..d29250545 100755 --- a/devscripts/install_deps.py +++ b/devscripts/install_deps.py @@ -42,17 +42,25 @@ def parse_args(): def main(): args = parse_args() project_table = parse_toml(read_file(args.input))['project'] + recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P[\w-]+)\]') optional_groups = project_table['optional-dependencies'] excludes = args.exclude or [] + def yield_deps(group): + for dep in group: + if mobj := recursive_pattern.fullmatch(dep): + yield from optional_groups.get(mobj.group('group_name'), []) + else: + yield dep + targets = [] if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group targets.extend(project_table['dependencies']) if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group - targets.extend(optional_groups['default']) + targets.extend(yield_deps(optional_groups['default'])) for include in filter(None, map(optional_groups.get, args.include or [])): - targets.extend(include) + targets.extend(yield_deps(include)) targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes] diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat deleted file mode 100644 index 57b1f4bf4..000000000 --- a/devscripts/run_tests.bat +++ /dev/null @@ -1,4 +0,0 @@ -@echo off - ->&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead -python %~dp0run_tests.py %~1 diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py index 6d638a974..c605aa62c 100755 --- a/devscripts/run_tests.py +++ b/devscripts/run_tests.py @@ -4,6 +4,7 @@ import functools import os import re +import shlex import subprocess import sys from pathlib import Path @@ -18,6 +19,8 @@ def parse_args(): 'test', help='a extractor tests, or one of "core" or "download"', nargs='*') parser.add_argument( '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION') + parser.add_argument( + '--pytest-args', help='arguments to passthrough to pytest') return parser.parse_args() @@ -26,15 +29,16 @@ def run_tests(*tests, pattern=None, ci=False): run_download = 'download' in tests tests = list(map(fix_test_name, tests)) - arguments = ['pytest', '-Werror', '--tb=short'] + pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '') + arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)] if ci: arguments.append('--color=yes') + if pattern: + arguments.extend(['-k', pattern]) if run_core: arguments.extend(['-m', 'not download']) elif run_download: arguments.extend(['-m', 'download']) - elif pattern: - arguments.extend(['-k', pattern]) else: arguments.extend( f'test/test_download.py::TestDownload::test_{test}' for test in tests) @@ -46,13 +50,13 @@ def run_tests(*tests, pattern=None, ci=False): pass arguments = [sys.executable, '-Werror', '-m', 'unittest'] + if pattern: + arguments.extend(['-k', pattern]) if run_core: print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True) return 1 elif run_download: arguments.append('test.test_download') - elif pattern: - arguments.extend(['-k', pattern]) else: arguments.extend( f'test.test_download.TestDownload.test_{test}' for test in tests) diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh deleted file mode 100755 index 123ceb1ee..000000000 --- a/devscripts/run_tests.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env sh - ->&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead' -python3 devscripts/run_tests.py "$1" diff --git a/pyinst.py b/pyinst.py deleted file mode 100755 index 4a8ed2d34..000000000 --- a/pyinst.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -import warnings - -from bundle.pyinstaller import main - -warnings.warn(DeprecationWarning('`pyinst.py` is deprecated and will be removed in a future version. ' - 'Use `bundle.pyinstaller` instead')) - -if __name__ == '__main__': - main() diff --git a/pyproject.toml b/pyproject.toml index 8e3bce4bf..96cb368b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,9 +66,16 @@ build = [ "wheel", ] dev = [ - "flake8", - "isort", - "pytest", + "pre-commit", + "yt-dlp[static-analysis]", + "yt-dlp[test]", +] +static-analysis = [ + "autopep8~=2.0", + "ruff~=0.4.4", +] +test = [ + "pytest~=8.1", ] pyinstaller = [ "pyinstaller>=6.3; sys_platform!='darwin'", @@ -126,3 +133,146 @@ artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] [tool.hatch.version] path = "yt_dlp/version.py" pattern = "_pkg_version = '(?P[^']+)'" + +[tool.hatch.envs.default] +features = ["curl-cffi", "default"] +dependencies = ["pre-commit"] +path = ".venv" +installer = "uv" + +[tool.hatch.envs.default.scripts] +setup = "pre-commit install --config .pre-commit-hatch.yaml" +yt-dlp = "python -Werror -Xdev -m yt_dlp {args}" + +[tool.hatch.envs.hatch-static-analysis] +detached = true +features = ["static-analysis"] +dependencies = [] # override hatch ruff version +config-path = "pyproject.toml" + +[tool.hatch.envs.hatch-static-analysis.scripts] +format-check = "autopep8 --diff {args:.}" +format-fix = "autopep8 --in-place {args:.}" +lint-check = "ruff check {args:.}" +lint-fix = "ruff check --fix {args:.}" + +[tool.hatch.envs.hatch-test] +features = ["test"] +dependencies = [ + "pytest-randomly~=3.15", + "pytest-rerunfailures~=14.0", + "pytest-xdist[psutil]~=3.5", +] + +[tool.hatch.envs.hatch-test.scripts] +run = "python -m devscripts.run_tests {args}" +run-cov = "echo Code coverage not implemented && exit 1" + +[[tool.hatch.envs.hatch-test.matrix]] +python = [ + "3.8", + "3.9", + "3.10", + "3.11", + "3.12", + "pypy3.8", + "pypy3.9", + "pypy3.10", +] + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +ignore = [ + "E402", # module level import not at top of file + "E501", # line too long + "E731", # do not assign a lambda expression, use a def + "E741", # ambiguous variable name +] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # import order +] + +[tool.ruff.lint.per-file-ignores] +"devscripts/lazy_load_template.py" = ["F401"] +"!yt_dlp/extractor/**.py" = ["I"] + +[tool.ruff.lint.isort] +known-first-party = [ + "bundle", + "devscripts", + "test", +] +relative-imports-order = "closest-to-furthest" + +[tool.autopep8] +max_line_length = 120 +recursive = true +exit-code = true +jobs = 0 +select = [ + "E101", + "E112", + "E113", + "E115", + "E116", + "E117", + "E121", + "E122", + "E123", + "E124", + "E125", + "E126", + "E127", + "E128", + "E129", + "E131", + "E201", + "E202", + "E203", + "E211", + "E221", + "E222", + "E223", + "E224", + "E225", + "E226", + "E227", + "E228", + "E231", + "E241", + "E242", + "E251", + "E252", + "E261", + "E262", + "E265", + "E266", + "E271", + "E272", + "E273", + "E274", + "E275", + "E301", + "E302", + "E303", + "E304", + "E305", + "E306", + "E502", + "E701", + "E702", + "E704", + "W391", + "W504", +] + +[tool.pytest.ini_options] +addopts = "-ra -v --strict-markers" +markers = [ + "download", +] diff --git a/setup.cfg b/setup.cfg index aeb4cee58..340cc3b4d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,12 +14,6 @@ remove-duplicate-keys = true remove-unused-variables = true -[tool:pytest] -addopts = -ra -v --strict-markers -markers = - download - - [tox:tox] skipsdist = true envlist = py{38,39,310,311,312},pypy{38,39,310} diff --git a/setup.py b/setup.py deleted file mode 100755 index 8d1e6d10b..000000000 --- a/setup.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -import warnings - - -if sys.argv[1:2] == ['py2exe']: - warnings.warn(DeprecationWarning('`setup.py py2exe` is deprecated and will be removed in a future version. ' - 'Use `bundle.py2exe` instead')) - - import bundle.py2exe - - bundle.py2exe.main() - -elif 'build_lazy_extractors' in sys.argv: - warnings.warn(DeprecationWarning('`setup.py build_lazy_extractors` is deprecated and will be removed in a future version. ' - 'Use `devscripts.make_lazy_extractors` instead')) - - import subprocess - - os.chdir(sys.path[0]) - print('running build_lazy_extractors') - subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py']) - -else: - - print( - 'ERROR: Building by calling `setup.py` is deprecated. ' - 'Use a build frontend like `build` instead. ', - 'Refer to https://build.pypa.io for more info', file=sys.stderr) - sys.exit(1) diff --git a/supportedsites.md b/supportedsites.md index ba77c0feb..387395613 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -14,7 +14,6 @@ # Supported sites - **6play** - **7plus** - **8tracks** - - **91porn** - **9c9media** - **9gag**: 9GAG - **9News** @@ -220,7 +219,7 @@ # Supported sites - **BusinessInsider** - **BuzzFeed** - **BYUtv**: (**Currently broken**) - - **CableAV** + - **CaffeineTV** - **Callin** - **Caltrans** - **CAM4** @@ -333,6 +332,8 @@ # Supported sites - **DailyWirePodcast** - **damtomo:record** - **damtomo:video** + - **dangalplay**: [*dangalplay*](## "netrc machine") + - **dangalplay:season**: [*dangalplay*](## "netrc machine") - **daum.net** - **daum.net:clip** - **daum.net:playlist** @@ -396,7 +397,6 @@ # Supported sites - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine") - - **Einthusan** - **eitb.tv** - **ElementorEmbed** - **Elonet** @@ -498,6 +498,7 @@ # Supported sites - **GameStar** - **Gaskrank** - **Gazeta**: (**Currently broken**) + - **GBNews**: GB News clips, features and live streams - **GDCVault**: [*gdcvault*](## "netrc machine") (**Currently broken**) - **GediDigital** - **gem.cbc.ca**: [*cbcgem*](## "netrc machine") @@ -527,6 +528,7 @@ # Supported sites - **GMANetworkVideo** - **Go** - **GoDiscovery** + - **GodResource** - **GodTube**: (**Currently broken**) - **Gofile** - **Golem** @@ -630,11 +632,11 @@ # Supported sites - **iwara:user**: [*iwara*](## "netrc machine") - **Ixigua** - **Izlesene** - - **Jable** - - **JablePlaylist** - **Jamendo** - **JamendoAlbum** - **JeuxVideo**: (**Currently broken**) + - **jiocinema**: [*jiocinema*](## "netrc machine") + - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** - **jiosaavn:playlist** - **jiosaavn:song** @@ -974,6 +976,7 @@ # Supported sites - **NRKTVSeason** - **NRKTVSeries** - **NRLTV**: (**Currently broken**) + - **nts.live** - **ntv.ru** - **NubilesPorn**: [*nubiles-porn*](## "netrc machine") - **nuum:live** @@ -1015,7 +1018,6 @@ # Supported sites - **orf:on** - **orf:podcast** - **orf:radio** - - **orf:tvthek**: ORF TVthek - **OsnatelTV**: [*osnateltv*](## "netrc machine") - **OsnatelTVLive**: [*osnateltv*](## "netrc machine") - **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine") @@ -1394,6 +1396,10 @@ # Supported sites - **SztvHu** - **t-online.de**: (**Currently broken**) - **Tagesschau**: (**Currently broken**) + - **TapTapApp** + - **TapTapAppIntl** + - **TapTapMoment** + - **TapTapPostIntl** - **Tass**: (**Currently broken**) - **TBS** - **TBSJPEpisode** @@ -1412,7 +1418,7 @@ # Supported sites - **TedSeries** - **TedTalk** - **Tele13** - - **Tele5**: (**Currently broken**) + - **Tele5** - **TeleBruxelles** - **TelecaribePlay** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es @@ -1452,11 +1458,12 @@ # Supported sites - **ThreeSpeak** - **ThreeSpeakUser** - **TikTok** + - **tiktok:collection** - **tiktok:effect**: (**Currently broken**) - **tiktok:live** - **tiktok:sound**: (**Currently broken**) - **tiktok:tag**: (**Currently broken**) - - **tiktok:user**: (**Currently broken**) + - **tiktok:user** - **TLC** - **TMZ** - **TNAFlix** @@ -1501,7 +1508,7 @@ # Supported sites - **tv2play.hu** - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - - **TV5MondePlus**: TV5MONDE+ + - **TV5MONDE** - **tv5unis** - **tv5unis:video** - **tv8.it** @@ -1639,8 +1646,6 @@ # Supported sites - **voicy**: (**Currently broken**) - **voicy:channel**: (**Currently broken**) - **VolejTV** - - **Voot**: [*voot*](## "netrc machine") (**Currently broken**) - - **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**) - **VoxMedia** - **VoxMediaVolume** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl @@ -1715,10 +1720,10 @@ # Supported sites - **wykop:​post:comment** - **Xanimu** - **XboxClips** - - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing - **XHamster** - **XHamsterEmbed** - **XHamsterUser** + - **XiaoHongShu**: 小红书 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 - **xinpianchang**: xinpianchang.com (**Currently broken**) @@ -1749,8 +1754,12 @@ # Supported sites - **YouNowLive** - **YouNowMoment** - **YouPorn** - - **YourPorn** - - **YourUpload** + - **YouPornCategory**: YouPorn category, with sorting, filtering and pagination + - **YouPornChannel**: YouPorn channel, with sorting and pagination + - **YouPornCollection**: YouPorn collection (user playlist), with sorting and pagination + - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination + - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination + - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination - **youtube**: YouTube - **youtube:clip** - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c633ce3e4..744587e45 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1912,7 +1912,7 @@ def test_search_nextjs_data(self): self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {}) self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None) self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {}) - with self.assertRaises(DeprecationWarning): + with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py index c1d7c53f5..1b21fe78e 100644 --- a/test/test_http_proxy.py +++ b/test/test_http_proxy.py @@ -93,6 +93,7 @@ class SSLTransport(urllib3.util.ssltransport.SSLTransport): This allows us to chain multiple TLS connections. """ + def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False): self.incoming = ssl.MemoryBIO() self.outgoing = ssl.MemoryBIO() diff --git a/test/test_utils.py b/test/test_utils.py index f359c5b4f..038a56b4a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -5,6 +5,7 @@ import sys import unittest import warnings +import datetime as dt sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -27,6 +28,7 @@ ExtractorError, InAdvancePagedList, LazyList, + NO_DEFAULT, OnDemandPagedList, Popen, age_restricted, @@ -773,6 +775,11 @@ def test_encode_compat_str(self): def test_parse_iso8601(self): self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None) + # default does not override timezone in date_str + self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9dfa28c4b..e9cd38a65 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1,4 +1,5 @@ # flake8: noqa: F401 +# isort: off from .youtube import ( # Youtube is moved to the top to improve performance YoutubeIE, @@ -24,6 +25,8 @@ YoutubeConsentRedirectIE, ) +# isort: on + from .abc import ( ABCIE, ABCIViewIE, @@ -43,27 +46,33 @@ ) from .academicearth import AcademicEarthCourseIE from .acast import ( - ACastIE, ACastChannelIE, + ACastIE, +) +from .acfun import ( + AcFunBangumiIE, + AcFunVideoIE, +) +from .adn import ( + ADNIE, + ADNSeasonIE, ) -from .acfun import AcFunVideoIE, AcFunBangumiIE -from .adn import ADNIE, ADNSeasonIE from .adobeconnect import AdobeConnectIE from .adobetv import ( + AdobeTVChannelIE, AdobeTVEmbedIE, AdobeTVIE, AdobeTVShowIE, - AdobeTVChannelIE, AdobeTVVideoIE, ) from .adultswim import AdultSwimIE from .aenetworks import ( - AENetworksIE, AENetworksCollectionIE, + AENetworksIE, AENetworksShowIE, - HistoryTopicIE, - HistoryPlayerIE, BiographyIE, + HistoryPlayerIE, + HistoryTopicIE, ) from .aeonco import AeonCoIE from .afreecatv import ( @@ -79,77 +88,85 @@ ) from .airtv import AirTVIE from .aitube import AitubeKZVideoIE +from .aliexpress import AliExpressLiveIE from .aljazeera import AlJazeeraIE +from .allocine import AllocineIE from .allstar import ( AllstarIE, AllstarProfileIE, ) from .alphaporno import AlphaPornoIE +from .alsace20tv import ( + Alsace20TVEmbedIE, + Alsace20TVIE, +) from .altcensored import ( - AltCensoredIE, AltCensoredChannelIE, + AltCensoredIE, ) from .alura import ( + AluraCourseIE, AluraIE, - AluraCourseIE ) from .amadeustv import AmadeusTVIE from .amara import AmaraIE -from .amcnetworks import AMCNetworksIE from .amazon import ( - AmazonStoreIE, AmazonReviewsIE, + AmazonStoreIE, ) from .amazonminitv import ( AmazonMiniTVIE, AmazonMiniTVSeasonIE, AmazonMiniTVSeriesIE, ) +from .amcnetworks import AMCNetworksIE from .americastestkitchen import ( AmericasTestKitchenIE, AmericasTestKitchenSeasonIE, ) from .anchorfm import AnchorFMEpisodeIE from .angel import AngelIE +from .antenna import ( + Ant1NewsGrArticleIE, + Ant1NewsGrEmbedIE, + AntennaGrWatchIE, +) from .anvato import AnvatoIE from .aol import AolIE -from .allocine import AllocineIE -from .aliexpress import AliExpressLiveIE -from .alsace20tv import ( - Alsace20TVIE, - Alsace20TVEmbedIE, -) from .apa import APAIE from .aparat import AparatIE from .appleconnect import AppleConnectIE +from .applepodcasts import ApplePodcastsIE from .appletrailers import ( AppleTrailersIE, AppleTrailersSectionIE, ) -from .applepodcasts import ApplePodcastsIE from .archiveorg import ( ArchiveOrgIE, YoutubeWebArchiveIE, ) from .arcpublishing import ArcPublishingIE -from .arkena import ArkenaIE from .ard import ( + ARDIE, ARDBetaMediathekIE, ARDMediathekCollectionIE, - ARDIE, ) +from .arkena import ArkenaIE +from .arnes import ArnesIE from .art19 import ( Art19IE, Art19ShowIE, ) from .arte import ( - ArteTVIE, - ArteTVEmbedIE, - ArteTVPlaylistIE, ArteTVCategoryIE, + ArteTVEmbedIE, + ArteTVIE, + ArteTVPlaylistIE, +) +from .asobichannel import ( + AsobiChannelIE, + AsobiChannelTagURLIE, ) -from .arnes import ArnesIE -from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE from .asobistage import AsobiStageIE from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE @@ -160,57 +177,60 @@ AudiodraftCustomIE, AudiodraftGenericIE, ) -from .audiomack import AudiomackIE, AudiomackAlbumIE +from .audiomack import ( + AudiomackAlbumIE, + AudiomackIE, +) from .audius import ( AudiusIE, - AudiusTrackIE, AudiusPlaylistIE, AudiusProfileIE, + AudiusTrackIE, ) from .awaan import ( AWAANIE, - AWAANVideoIE, AWAANLiveIE, AWAANSeasonIE, + AWAANVideoIE, ) from .axs import AxsIE from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .banbye import ( - BanByeIE, BanByeChannelIE, + BanByeIE, ) from .bandaichannel import BandaiChannelIE from .bandcamp import ( - BandcampIE, BandcampAlbumIE, - BandcampWeeklyIE, + BandcampIE, BandcampUserIE, + BandcampWeeklyIE, ) from .bannedvideo import BannedVideoIE from .bbc import ( - BBCCoUkIE, + BBCIE, BBCCoUkArticleIE, + BBCCoUkIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE, - BBCIE, ) +from .beatbump import ( + BeatBumpPlaylistIE, + BeatBumpVideoIE, +) +from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE -from .beatbump import ( - BeatBumpVideoIE, - BeatBumpPlaylistIE, -) -from .beatport import BeatportIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE from .bfmtv import ( BFMTVIE, - BFMTVLiveIE, BFMTVArticleIE, + BFMTVLiveIE, ) from .bibeltv import ( BibelTVLiveIE, @@ -221,37 +241,37 @@ from .bigo import BigoIE from .bild import BildIE from .bilibili import ( - BiliBiliIE, + BilibiliAudioAlbumIE, + BilibiliAudioIE, BiliBiliBangumiIE, - BiliBiliBangumiSeasonIE, BiliBiliBangumiMediaIE, + BiliBiliBangumiSeasonIE, + BilibiliCategoryIE, BilibiliCheeseIE, BilibiliCheeseSeasonIE, - BiliBiliSearchIE, - BilibiliCategoryIE, - BilibiliAudioIE, - BilibiliAudioAlbumIE, - BiliBiliPlayerIE, - BilibiliSpaceVideoIE, - BilibiliSpaceAudioIE, BilibiliCollectionListIE, - BilibiliSeriesListIE, BilibiliFavoritesListIE, - BilibiliWatchlaterIE, + BiliBiliIE, + BiliBiliPlayerIE, BilibiliPlaylistIE, + BiliBiliSearchIE, + BilibiliSeriesListIE, + BilibiliSpaceAudioIE, + BilibiliSpaceVideoIE, + BilibiliWatchlaterIE, BiliIntlIE, BiliIntlSeriesIE, BiliLiveIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( - BitChuteIE, BitChuteChannelIE, + BitChuteIE, ) from .blackboardcollaborate import BlackboardCollaborateIE from .bleacherreport import ( - BleacherReportIE, BleacherReportCMSIE, + BleacherReportIE, ) from .blerp import BlerpIE from .blogger import BloggerIE @@ -264,68 +284,69 @@ from .boxcast import BoxCastVideoIE from .bpb import BpbIE from .br import BRIE -from .bravotv import BravoTVIE from .brainpop import ( - BrainPOPIE, - BrainPOPJrIE, BrainPOPELLIE, BrainPOPEspIE, BrainPOPFrIE, + BrainPOPIE, BrainPOPIlIE, + BrainPOPJrIE, ) +from .bravotv import BravoTVIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) from .brilliantpala import ( - BrilliantpalaElearnIE, BrilliantpalaClassesIE, + BrilliantpalaElearnIE, ) -from .businessinsider import BusinessInsiderIE from .bundesliga import BundesligaIE from .bundestag import BundestagIE +from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE +from .caffeinetv import CaffeineTVIE from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE from .camdemy import ( + CamdemyFolderIE, CamdemyIE, - CamdemyFolderIE ) from .camfm import ( CamFMEpisodeIE, - CamFMShowIE + CamFMShowIE, ) from .cammodels import CamModelsIE from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE from .canal1 import Canal1IE from .canalalpha import CanalAlphaIE -from .canalplus import CanalplusIE from .canalc2 import Canalc2IE +from .canalplus import CanalplusIE from .caracoltv import CaracolTvPlayIE from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, + CBCGemIE, + CBCGemLiveIE, + CBCGemPlaylistIE, CBCPlayerIE, CBCPlayerPlaylistIE, - CBCGemIE, - CBCGemPlaylistIE, - CBCGemLiveIE, ) from .cbs import ( CBSIE, ParamountPressExpressIE, ) from .cbsnews import ( + CBSLocalArticleIE, + CBSLocalIE, + CBSLocalLiveIE, CBSNewsEmbedIE, CBSNewsIE, - CBSLocalIE, - CBSLocalArticleIE, - CBSLocalLiveIE, CBSNewsLiveIE, CBSNewsLiveVideoIE, ) @@ -354,12 +375,12 @@ from .cinemax import CinemaxIE from .cinetecamilano import CinetecaMilanoIE from .cineverse import ( - CineverseIE, CineverseDetailsIE, + CineverseIE, ) from .ciscolive import ( - CiscoLiveSessionIE, CiscoLiveSearchIE, + CiscoLiveSessionIE, ) from .ciscowebex import CiscoWebexIE from .cjsw import CJSWIE @@ -372,16 +393,13 @@ from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE -from .cnbc import ( - CNBCVideoIE, -) +from .cnbc import CNBCVideoIE from .cnn import ( CNNIE, - CNNBlogsIE, CNNArticleIE, + CNNBlogsIE, CNNIndonesiaIE, ) -from .coub import CoubIE from .comedycentral import ( ComedyCentralIE, ComedyCentralTVIE, @@ -399,44 +417,48 @@ from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE +from .coub import CoubIE +from .cozytv import CozyTVIE from .cpac import ( CPACIE, CPACPlaylistIE, ) -from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE from .craftsy import CraftsyIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( - CrowdBunkerIE, CrowdBunkerChannelIE, + CrowdBunkerIE, ) from .crtvg import CrtvgIE from .crunchyroll import ( + CrunchyrollArtistIE, CrunchyrollBetaIE, CrunchyrollBetaShowIE, CrunchyrollMusicIE, - CrunchyrollArtistIE, ) -from .cspan import CSpanIE, CSpanCongressIE +from .cspan import ( + CSpanCongressIE, + CSpanIE, +) from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( - CuriosityStreamIE, CuriosityStreamCollectionsIE, + CuriosityStreamIE, CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE from .cybrary import ( + CybraryCourseIE, CybraryIE, - CybraryCourseIE ) from .dacast import ( - DacastVODIE, DacastPlaylistIE, + DacastVODIE, ) from .dailymail import DailyMailIE from .dailymotion import ( @@ -453,9 +475,13 @@ DamtomoRecordIE, DamtomoVideoIE, ) +from .dangalplay import ( + DangalPlayIE, + DangalPlaySeasonIE, +) from .daum import ( - DaumIE, DaumClipIE, + DaumIE, DaumPlaylistIE, DaumUserIE, ) @@ -463,49 +489,69 @@ from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import ( - DeezerPlaylistIE, DeezerAlbumIE, + DeezerPlaylistIE, ) from .democracynow import DemocracynowIE from .detik import DetikEmbedIE +from .deuxm import ( + DeuxMIE, + DeuxMNewsIE, +) +from .dfb import DFBIE +from .dhm import DHMIE +from .digitalconcerthall import DigitalConcertHallIE +from .digiteka import DigitekaIE +from .discogs import DiscogsReleasePlaylistIE +from .discovery import DiscoveryIE +from .disney import DisneyIE +from .dispeak import DigitallySpeakingIE from .dlf import ( DLFIE, DLFCorpusIE, ) -from .dfb import DFBIE -from .dhm import DHMIE +from .dlive import ( + DLiveStreamIE, + DLiveVODIE, +) from .douyutv import ( DouyuShowIE, DouyuTVIE, ) from .dplay import ( - DPlayIE, - DiscoveryPlusIE, - HGTVDeIE, - GoDiscoveryIE, - TravelChannelIE, - CookingChannelIE, - HGTVUsaIE, - FoodNetworkIE, - InvestigationDiscoveryIE, - DestinationAmericaIE, - AmHistoryChannelIE, - ScienceChannelIE, - DIYNetworkIE, - DiscoveryLifeIE, - AnimalPlanetIE, TLCIE, - MotorTrendIE, - MotorTrendOnDemandIE, - DiscoveryPlusIndiaIE, + AmHistoryChannelIE, + AnimalPlanetIE, + CookingChannelIE, + DestinationAmericaIE, + DiscoveryLifeIE, DiscoveryNetworksDeIE, + DiscoveryPlusIE, + DiscoveryPlusIndiaIE, + DiscoveryPlusIndiaShowIE, DiscoveryPlusItalyIE, DiscoveryPlusItalyShowIE, - DiscoveryPlusIndiaShowIE, + DIYNetworkIE, + DPlayIE, + FoodNetworkIE, GlobalCyclingNetworkPlusIE, + GoDiscoveryIE, + HGTVDeIE, + HGTVUsaIE, + InvestigationDiscoveryIE, + MotorTrendIE, + MotorTrendOnDemandIE, + ScienceChannelIE, + TravelChannelIE, ) -from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE +from .dreisat import DreiSatIE +from .drooble import DroobleIE +from .dropbox import DropboxIE +from .dropout import ( + DropoutIE, + DropoutSeasonIE, +) from .drtuber import DrTuberIE from .drtv import ( DRTVIE, @@ -514,32 +560,21 @@ DRTVSeriesIE, ) from .dtube import DTubeIE -from .dvtv import DVTVIE from .duboku import ( DubokuIE, - DubokuPlaylistIE + DubokuPlaylistIE, ) from .dumpert import DumpertIE -from .deuxm import ( - DeuxMIE, - DeuxMNewsIE -) -from .digitalconcerthall import DigitalConcertHallIE -from .discogs import DiscogsReleasePlaylistIE -from .discovery import DiscoveryIE -from .disney import DisneyIE -from .dispeak import DigitallySpeakingIE -from .dropbox import DropboxIE -from .dropout import ( - DropoutSeasonIE, - DropoutIE -) from .duoplay import DuoplayIE +from .dvtv import DVTVIE from .dw import ( DWIE, DWArticleIE, ) -from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE +from .eagleplatform import ( + ClipYouEmbedIE, + EaglePlatformIE, +) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( @@ -563,8 +598,8 @@ from .eporner import EpornerIE from .erocast import ErocastIE from .eroprofile import ( - EroProfileIE, EroProfileAlbumIE, + EroProfileIE, ) from .err import ERRJupiterIE from .ertgr import ( @@ -574,31 +609,33 @@ ) from .espn import ( ESPNIE, - WatchESPNIE, ESPNArticleIE, - FiveThirtyEightIE, ESPNCricInfoIE, + FiveThirtyEightIE, + WatchESPNIE, ) from .ettutv import EttuTvIE -from .europa import EuropaIE, EuroParlWebstreamIE +from .europa import ( + EuropaIE, + EuroParlWebstreamIE, +) from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE from .expressen import ExpressenIE from .eyedotv import EyedoTVIE from .facebook import ( + FacebookAdsIE, FacebookIE, FacebookPluginsVideoIE, FacebookRedirectURLIE, FacebookReelIE, - FacebookAdsIE, +) +from .fancode import ( + FancodeLiveIE, + FancodeVodIE, ) from .fathom import FathomIE -from .fancode import ( - FancodeVodIE, - FancodeLiveIE -) - from .faz import FazIE from .fc2 import ( FC2IE, @@ -608,8 +645,8 @@ from .fczenit import FczenitIE from .fifa import FifaIE from .filmon import ( - FilmOnIE, FilmOnChannelIE, + FilmOnIE, ) from .filmweb import FilmwebIE from .firsttv import FirstTVIE @@ -617,17 +654,17 @@ from .flextv import FlexTVIE from .flickr import FlickrIE from .floatplane import ( - FloatplaneIE, FloatplaneChannelIE, + FloatplaneIE, ) from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE from .fourtube import ( FourTubeIE, - PornTubeIE, - PornerBrosIE, FuxIE, + PornerBrosIE, + PornTubeIE, ) from .fox import FOXIE from .fox9 import ( @@ -635,8 +672,8 @@ FOX9NewsIE, ) from .foxnews import ( - FoxNewsIE, FoxNewsArticleIE, + FoxNewsIE, FoxNewsVideoIE, ) from .foxsports import FoxSportsIE @@ -644,20 +681,20 @@ from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, - FranceTVSiteIE, FranceTVInfoIE, + FranceTVSiteIE, ) from .freesound import FreesoundIE from .freespeech import FreespeechIE -from .frontendmasters import ( - FrontendMastersIE, - FrontendMastersLessonIE, - FrontendMastersCourseIE -) from .freetv import ( FreeTvIE, FreeTvMoviesIE, ) +from .frontendmasters import ( + FrontendMastersCourseIE, + FrontendMastersIE, + FrontendMastersLessonIE, +) from .fujitv import FujiTVFODPlus7IE from .funimation import ( FunimationIE, @@ -668,32 +705,37 @@ from .funker530 import Funker530IE from .fuyintv import FuyinTVIE from .gab import ( - GabTVIE, GabIE, + GabTVIE, ) from .gaia import GaiaIE from .gamejolt import ( - GameJoltIE, - GameJoltUserIE, + GameJoltCommunityIE, GameJoltGameIE, GameJoltGameSoundtrackIE, - GameJoltCommunityIE, + GameJoltIE, GameJoltSearchIE, + GameJoltUserIE, ) from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaskrank import GaskrankIE from .gazeta import GazetaIE +from .gbnews import GBNewsIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE +from .genericembeds import ( + HTML5MediaEmbedIE, + QuotedHTMLIE, +) from .genius import ( GeniusIE, GeniusLyricsIE, ) from .getcourseru import ( + GetCourseRuIE, GetCourseRuPlayerIE, - GetCourseRuIE ) from .gettr import ( GettrIE, @@ -702,41 +744,45 @@ from .giantbomb import GiantBombIE from .glide import GlideIE from .globalplayer import ( + GlobalPlayerAudioEpisodeIE, + GlobalPlayerAudioIE, GlobalPlayerLiveIE, GlobalPlayerLivePlaylistIE, - GlobalPlayerAudioIE, - GlobalPlayerAudioEpisodeIE, - GlobalPlayerVideoIE + GlobalPlayerVideoIE, ) from .globo import ( - GloboIE, GloboArticleIE, + GloboIE, +) +from .glomex import ( + GlomexEmbedIE, + GlomexIE, ) from .gmanetwork import GMANetworkVideoIE from .go import GoIE -from .godtube import GodTubeIE from .godresource import GodResourceIE +from .godtube import GodTubeIE from .gofile import GofileIE from .golem import GolemIE from .goodgame import GoodGameIE from .googledrive import ( - GoogleDriveIE, GoogleDriveFolderIE, + GoogleDriveIE, ) from .googlepodcasts import ( - GooglePodcastsIE, GooglePodcastsFeedIE, + GooglePodcastsIE, ) from .googlesearch import GoogleSearchIE -from .gopro import GoProIE from .goplay import GoPlayIE +from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE from .gronkh import ( - GronkhIE, GronkhFeedIE, - GronkhVodsIE + GronkhIE, + GronkhVodsIE, ) from .groupon import GrouponIE from .harpodeon import HarpodeonIE @@ -745,10 +791,10 @@ from .heise import HeiseIE from .hellporno import HellPornoIE from .hgtv import HGTVComShowIE -from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitrecord import HitRecordIE +from .hketv import HKETVIE from .hollywoodreporter import ( HollywoodReporterIE, HollywoodReporterPlaylistIE, @@ -757,8 +803,8 @@ from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, - HotStarPrefixIE, HotStarPlaylistIE, + HotStarPrefixIE, HotStarSeasonIE, HotStarSeriesIE, ) @@ -769,34 +815,30 @@ HRTiPlaylistIE, ) from .hse import ( - HSEShowIE, HSEProductIE, -) -from .genericembeds import ( - HTML5MediaEmbedIE, - QuotedHTMLIE, + HSEShowIE, ) from .huajiao import HuajiaoIE -from .huya import HuyaLiveIE from .huffpost import HuffPostIE from .hungama import ( + HungamaAlbumPlaylistIE, HungamaIE, HungamaSongIE, - HungamaAlbumPlaylistIE, ) +from .huya import HuyaLiveIE from .hypem import HypemIE from .hypergryph import MonsterSirenHypergryphMusicIE from .hytale import HytaleIE from .icareus import IcareusIE from .ichinanalive import ( - IchinanaLiveIE, IchinanaLiveClipIE, + IchinanaLiveIE, ) from .idolplus import IdolPlusIE from .ign import ( IGNIE, - IGNVideoIE, IGNArticleIE, + IGNVideoIE, ) from .iheart import ( IHeartRadioIE, @@ -806,12 +848,12 @@ from .iltalehti import IltalehtiIE from .imdb import ( ImdbIE, - ImdbListIE + ImdbListIE, ) from .imgur import ( - ImgurIE, ImgurAlbumIE, ImgurGalleryIE, + ImgurIE, ) from .ina import InaIE from .inc import IncIE @@ -820,20 +862,20 @@ from .instagram import ( InstagramIE, InstagramIOSIE, - InstagramUserIE, - InstagramTagIE, InstagramStoryIE, + InstagramTagIE, + InstagramUserIE, ) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import ( + IPrimaCNNIE, IPrimaIE, - IPrimaCNNIE ) from .iqiyi import ( - IqiyiIE, + IqAlbumIE, IqIE, - IqAlbumIE + IqiyiIE, ) from .islamchannel import ( IslamChannelIE, @@ -841,16 +883,16 @@ ) from .israelnationalnews import IsraelNationalNewsIE from .itprotv import ( + ITProTVCourseIE, ITProTVIE, - ITProTVCourseIE ) from .itv import ( - ITVIE, ITVBTCCIE, + ITVIE, ) from .ivi import ( + IviCompilationIE, IviIE, - IviCompilationIE ) from .ivideon import IvideonIE from .iwara import ( @@ -861,25 +903,29 @@ from .ixigua import IxiguaIE from .izlesene import IzleseneIE from .jamendo import ( - JamendoIE, JamendoAlbumIE, + JamendoIE, ) from .japandiet import ( + SangiinIE, + SangiinInstructionIE, ShugiinItvLiveIE, ShugiinItvLiveRoomIE, ShugiinItvVodIE, - SangiinInstructionIE, - SangiinIE, ) from .jeuxvideo import JeuxVideoIE +from .jiocinema import ( + JioCinemaIE, + JioCinemaSeriesIE, +) from .jiosaavn import ( - JioSaavnSongIE, JioSaavnAlbumIE, JioSaavnPlaylistIE, + JioSaavnSongIE, ) -from .jove import JoveIE from .joj import JojIE from .joqrag import JoqrAgIE +from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( JTBCIE, @@ -906,17 +952,17 @@ from .kommunetv import KommunetvIE from .kompas import KompasVideoIE from .koo import KooIE -from .kth import KTHIE from .krasview import KrasViewIE +from .kth import KTHIE from .ku6 import Ku6IE from .kukululive import KukuluLiveIE from .kuwo import ( - KuwoIE, KuwoAlbumIE, - KuwoChartIE, - KuwoSingerIE, KuwoCategoryIE, + KuwoChartIE, + KuwoIE, KuwoMvIE, + KuwoSingerIE, ) from .la7 import ( LA7IE, @@ -936,14 +982,14 @@ ) from .lci import LCIIE from .lcp import ( - LcpPlayIE, LcpIE, + LcpPlayIE, ) from .lecture2go import Lecture2GoIE from .lecturio import ( - LecturioIE, LecturioCourseIE, LecturioDeCourseIE, + LecturioIE, ) from .leeco import ( LeIE, @@ -960,22 +1006,22 @@ from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( - LifeNewsIE, LifeEmbedIE, + LifeNewsIE, ) from .likee import ( LikeeIE, - LikeeUserIE + LikeeUserIE, ) from .limelight import ( - LimelightMediaIE, LimelightChannelIE, LimelightChannelListIE, + LimelightMediaIE, ) from .linkedin import ( LinkedInIE, - LinkedInLearningIE, LinkedInLearningCourseIE, + LinkedInLearningIE, ) from .liputan6 import Liputan6IE from .listennotes import ListenNotesIE @@ -992,25 +1038,23 @@ LnkIE, ) from .loom import ( - LoomIE, LoomFolderIE, + LoomIE, ) from .lovehomeporn import LoveHomePornIE from .lrt import ( LRTVODIE, - LRTStreamIE + LRTStreamIE, ) from .lsm import ( LSMLREmbedIE, LSMLTVEmbedIE, - LSMReplayIE -) -from .lumni import ( - LumniIE + LSMReplayIE, ) +from .lumni import LumniIE from .lynda import ( + LyndaCourseIE, LyndaIE, - LyndaCourseIE ) from .maariv import MaarivIE from .magellantv import MagellanTVIE @@ -1022,13 +1066,13 @@ ) from .mainstreaming import MainStreamingIE from .mangomolo import ( - MangomoloVideoIE, MangomoloLiveIE, + MangomoloVideoIE, ) from .manoto import ( ManotoTVIE, - ManotoTVShowIE, ManotoTVLiveIE, + ManotoTVShowIE, ) from .manyvids import ManyVidsIE from .maoritv import MaoriTVIE @@ -1044,13 +1088,14 @@ from .medaltv import MedalTVIE from .mediaite import MediaiteIE from .mediaklikk import MediaKlikkIE +from .medialaan import MedialaanIE from .mediaset import ( MediasetIE, MediasetShowIE, ) from .mediasite import ( - MediasiteIE, MediasiteCatalogIE, + MediasiteIE, MediasiteNamedCatalogIE, ) from .mediastream import ( @@ -1060,26 +1105,30 @@ from .mediaworksnz import MediaWorksNZVODIE from .medici import MediciIE from .megaphone import MegaphoneIE +from .megatvcom import ( + MegaTVComEmbedIE, + MegaTVComIE, +) from .meipai import MeipaiIE from .melonvod import MelonVODIE from .metacritic import MetacriticIE from .mgtv import MGTVIE +from .microsoftembed import MicrosoftEmbedIE from .microsoftstream import MicrosoftStreamIE from .microsoftvirtualacademy import ( - MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, + MicrosoftVirtualAcademyIE, ) -from .microsoftembed import MicrosoftEmbedIE from .mildom import ( - MildomIE, - MildomVodIE, MildomClipIE, + MildomIE, MildomUserVodIE, + MildomVodIE, ) from .minds import ( - MindsIE, MindsChannelIE, MindsGroupIE, + MindsIE, ) from .minoto import MinotoIE from .mirrativ import ( @@ -1087,31 +1136,34 @@ MirrativUserIE, ) from .mirrorcouk import MirrorCoUKIE -from .mit import TechTVMITIE, OCWMITIE +from .mit import ( + OCWMITIE, + TechTVMITIE, +) from .mitele import MiTeleIE from .mixch import ( - MixchIE, MixchArchiveIE, + MixchIE, ) from .mixcloud import ( MixcloudIE, - MixcloudUserIE, MixcloudPlaylistIE, + MixcloudUserIE, ) from .mlb import ( MLBIE, - MLBVideoIE, MLBTVIE, MLBArticleIE, + MLBVideoIE, ) from .mlssoccer import MLSSoccerIE from .mocha import MochaVideoIE from .mojvideo import MojvideoIE from .monstercat import MonstercatIE from .motherless import ( - MotherlessIE, - MotherlessGroupIE, MotherlessGalleryIE, + MotherlessGroupIE, + MotherlessIE, MotherlessUploaderIE, ) from .motorsport import MotorsportIE @@ -1121,23 +1173,26 @@ from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( - MTVIE, - MTVVideoIE, - MTVServicesEmbeddedIE, MTVDEIE, - MTVJapanIE, + MTVIE, MTVItaliaIE, MTVItaliaProgrammaIE, + MTVJapanIE, + MTVServicesEmbeddedIE, + MTVVideoIE, ) from .muenchentv import MuenchenTVIE -from .murrtube import MurrtubeIE, MurrtubeUserIE +from .murrtube import ( + MurrtubeIE, + MurrtubeUserIE, +) from .museai import MuseAIIE from .musescore import MuseScoreIE from .musicdex import ( - MusicdexSongIE, MusicdexAlbumIE, MusicdexArtistIE, MusicdexPlaylistIE, + MusicdexSongIE, ) from .mx3 import ( Mx3IE, @@ -1148,7 +1203,10 @@ MxplayerIE, MxplayerShowIE, ) -from .myspace import MySpaceIE, MySpaceAlbumIE +from .myspace import ( + MySpaceAlbumIE, + MySpaceIE, +) from .myspass import MySpassIE from .myvideoge import MyVideoGeIE from .myvidster import MyVidsterIE @@ -1162,8 +1220,8 @@ NateProgramIE, ) from .nationalgeographic import ( - NationalGeographicVideoIE, NationalGeographicTVIE, + NationalGeographicVideoIE, ) from .naver import ( NaverIE, @@ -1171,12 +1229,12 @@ NaverNowIE, ) from .nba import ( - NBAWatchEmbedIE, - NBAWatchIE, - NBAWatchCollectionIE, - NBAEmbedIE, NBAIE, NBAChannelIE, + NBAEmbedIE, + NBAWatchCollectionIE, + NBAWatchEmbedIE, + NBAWatchIE, ) from .nbc import ( NBCIE, @@ -1190,35 +1248,35 @@ ) from .ndr import ( NDRIE, - NJoyIE, NDREmbedBaseIE, NDREmbedIE, NJoyEmbedIE, + NJoyIE, ) from .ndtv import NDTVIE from .nebula import ( - NebulaIE, - NebulaClassIE, - NebulaSubscriptionsIE, NebulaChannelIE, + NebulaClassIE, + NebulaIE, + NebulaSubscriptionsIE, ) from .nekohacker import NekoHackerIE from .nerdcubed import NerdCubedFeedIE -from .netzkino import NetzkinoIE from .neteasemusic import ( - NetEaseMusicIE, NetEaseMusicAlbumIE, - NetEaseMusicSingerIE, + NetEaseMusicDjRadioIE, + NetEaseMusicIE, NetEaseMusicListIE, NetEaseMusicMvIE, NetEaseMusicProgramIE, - NetEaseMusicDjRadioIE, + NetEaseMusicSingerIE, ) from .netverse import ( NetverseIE, NetversePlaylistIE, NetverseSearchIE, ) +from .netzkino import NetzkinoIE from .newgrounds import ( NewgroundsIE, NewgroundsPlaylistIE, @@ -1227,14 +1285,14 @@ from .newspicks import NewsPicksIE from .newsy import NewsyIE from .nextmedia import ( - NextMediaIE, - NextMediaActionNewsIE, AppleDailyIE, + NextMediaActionNewsIE, + NextMediaIE, NextTVIE, ) from .nexx import ( - NexxIE, NexxEmbedIE, + NexxIE, ) from .nfb import ( NFBIE, @@ -1248,43 +1306,43 @@ NFLPlusReplayIE, ) from .nhk import ( - NhkVodIE, - NhkVodProgramIE, NhkForSchoolBangumiIE, - NhkForSchoolSubjectIE, NhkForSchoolProgramListIE, + NhkForSchoolSubjectIE, NhkRadioNewsPageIE, NhkRadiruIE, NhkRadiruLiveIE, + NhkVodIE, + NhkVodProgramIE, ) from .nhl import NHLIE from .nick import ( - NickIE, NickBrIE, NickDeIE, + NickIE, NickRuIE, ) from .niconico import ( - NiconicoIE, - NiconicoPlaylistIE, - NiconicoUserIE, - NiconicoSeriesIE, NiconicoHistoryIE, + NiconicoIE, + NiconicoLiveIE, + NiconicoPlaylistIE, + NiconicoSeriesIE, + NiconicoUserIE, NicovideoSearchDateIE, NicovideoSearchIE, NicovideoSearchURLIE, NicovideoTagURLIE, - NiconicoLiveIE, +) +from .niconicochannelplus import ( + NiconicoChannelPlusChannelLivesIE, + NiconicoChannelPlusChannelVideosIE, + NiconicoChannelPlusIE, ) from .ninaprotocol import NinaProtocolIE from .ninecninemedia import ( - NineCNineMediaIE, CPTwentyFourIE, -) -from .niconicochannelplus import ( - NiconicoChannelPlusIE, - NiconicoChannelPlusChannelVideosIE, - NiconicoChannelPlusChannelLivesIE, + NineCNineMediaIE, ) from .ninegag import NineGagIE from .ninenews import NineNewsIE @@ -1309,24 +1367,24 @@ ) from .noz import NozIE from .npo import ( - AndereTijdenIE, NPOIE, - NPOLiveIE, - NPORadioIE, - NPORadioFragmentIE, - SchoolTVIE, - HetKlokhuisIE, VPROIE, WNLIE, + AndereTijdenIE, + HetKlokhuisIE, + NPOLiveIE, + NPORadioFragmentIE, + NPORadioIE, + SchoolTVIE, ) from .npr import NprIE from .nrk import ( NRKIE, - NRKPlaylistIE, - NRKSkoleIE, NRKTVIE, - NRKTVDirekteIE, + NRKPlaylistIE, NRKRadioPodkastIE, + NRKSkoleIE, + NRKTVDirekteIE, NRKTVEpisodeIE, NRKTVEpisodesIE, NRKTVSeasonIE, @@ -1338,18 +1396,18 @@ from .ntvde import NTVDeIE from .ntvru import NTVRuIE from .nubilesporn import NubilesPornIE +from .nuum import ( + NuumLiveIE, + NuumMediaIE, + NuumTabIE, +) +from .nuvid import NuvidIE from .nytimes import ( - NYTimesIE, NYTimesArticleIE, NYTimesCookingIE, NYTimesCookingRecipeIE, + NYTimesIE, ) -from .nuum import ( - NuumLiveIE, - NuumTabIE, - NuumMediaIE, -) -from .nuvid import NuvidIE from .nzherald import NZHeraldIE from .nzonscreen import NZOnScreenIE from .nzz import NZZIE @@ -1357,7 +1415,7 @@ from .odnoklassniki import OdnoklassnikiIE from .oftv import ( OfTVIE, - OfTVPlaylistIE + OfTVPlaylistIE, ) from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE @@ -1370,8 +1428,8 @@ from .onenewsnz import OneNewsNZIE from .oneplace import OnePlacePodcastIE from .onet import ( - OnetIE, OnetChannelIE, + OnetIE, OnetMVPIE, OnetPlIE, ) @@ -1381,34 +1439,33 @@ OpencastPlaylistIE, ) from .openrec import ( - OpenRecIE, OpenRecCaptureIE, + OpenRecIE, OpenRecMovieIE, ) from .ora import OraTVIE from .orf import ( - ORFTVthekIE, - ORFFM4StoryIE, - ORFONIE, - ORFRadioIE, - ORFPodcastIE, ORFIPTVIE, + ORFONIE, + ORFFM4StoryIE, + ORFPodcastIE, + ORFRadioIE, ) from .outsidetv import OutsideTVIE from .owncloud import OwnCloudIE from .packtpub import ( - PacktPubIE, PacktPubCourseIE, + PacktPubIE, ) from .palcomp3 import ( - PalcoMP3IE, PalcoMP3ArtistIE, + PalcoMP3IE, PalcoMP3VideoIE, ) from .panopto import ( PanoptoIE, PanoptoListIE, - PanoptoPlaylistIE + PanoptoPlaylistIE, ) from .paramountplus import ( ParamountPlusIE, @@ -1417,12 +1474,18 @@ from .parler import ParlerIE from .parlview import ParlviewIE from .patreon import ( + PatreonCampaignIE, PatreonIE, - PatreonCampaignIE ) -from .pbs import PBSIE, PBSKidsIE +from .pbs import ( + PBSIE, + PBSKidsIE, +) from .pearvideo import PearVideoIE -from .peekvids import PeekVidsIE, PlayVidsIE +from .peekvids import ( + PeekVidsIE, + PlayVidsIE, +) from .peertube import ( PeerTubeIE, PeerTubePlaylistIE, @@ -1430,7 +1493,7 @@ from .peertv import PeerTVIE from .peloton import ( PelotonIE, - PelotonLiveIE + PelotonLiveIE, ) from .performgroup import PerformGroupIE from .periscope import ( @@ -1450,8 +1513,8 @@ from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pinterest import ( - PinterestIE, PinterestCollectionIE, + PinterestIE, ) from .pixivsketch import ( PixivSketchIE, @@ -1460,19 +1523,22 @@ from .pladform import PladformIE from .planetmarathi import PlanetMarathiIE from .platzi import ( - PlatziIE, PlatziCourseIE, + PlatziIE, ) from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE from .playwire import PlaywireIE -from .plutotv import PlutoTVIE from .pluralsight import ( - PluralsightIE, PluralsightCourseIE, + PluralsightIE, +) +from .plutotv import PlutoTVIE +from .podbayfm import ( + PodbayFMChannelIE, + PodbayFMIE, ) -from .podbayfm import PodbayFMIE, PodbayFMChannelIE from .podchaser import PodchaserIE from .podomatic import PodomaticIE from .pokemon import ( @@ -1480,15 +1546,15 @@ PokemonWatchIE, ) from .pokergo import ( - PokerGoIE, PokerGoCollectionIE, + PokerGoIE, ) from .polsatgo import PolsatGoIE from .polskieradio import ( - PolskieRadioIE, - PolskieRadioLegacyIE, PolskieRadioAuditionIE, PolskieRadioCategoryIE, + PolskieRadioIE, + PolskieRadioLegacyIE, PolskieRadioPlayerIE, PolskieRadioPodcastIE, PolskieRadioPodcastListIE, @@ -1499,57 +1565,62 @@ from .pornflip import PornFlipIE from .pornhub import ( PornHubIE, - PornHubUserIE, - PornHubPlaylistIE, PornHubPagedVideoListIE, + PornHubPlaylistIE, + PornHubUserIE, PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE -from .puhutv import ( - PuhuTVIE, - PuhuTVSerieIE, -) from .pr0gramm import Pr0grammIE -from .prankcast import PrankCastIE, PrankCastPostIE +from .prankcast import ( + PrankCastIE, + PrankCastPostIE, +) from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE from .prosiebensat1 import ProSiebenSat1IE from .prx import ( - PRXStoryIE, - PRXSeriesIE, PRXAccountIE, + PRXSeriesIE, + PRXSeriesSearchIE, PRXStoriesSearchIE, - PRXSeriesSearchIE + PRXStoryIE, +) +from .puhutv import ( + PuhuTVIE, + PuhuTVSerieIE, ) from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .qdance import QDanceIE from .qingting import QingTingIE from .qqmusic import ( - QQMusicIE, - QQMusicSingerIE, QQMusicAlbumIE, - QQMusicToplistIE, + QQMusicIE, QQMusicPlaylistIE, + QQMusicSingerIE, + QQMusicToplistIE, ) from .r7 import ( R7IE, R7ArticleIE, ) -from .radiko import RadikoIE, RadikoRadioIE +from .radiko import ( + RadikoIE, + RadikoRadioIE, +) from .radiocanada import ( - RadioCanadaIE, RadioCanadaAudioVideoIE, + RadioCanadaIE, ) from .radiocomercial import ( RadioComercialIE, RadioComercialPlaylistIE, ) from .radiode import RadioDeIE -from .radiojavan import RadioJavanIE from .radiofrance import ( FranceCultureIE, RadioFranceIE, @@ -1558,35 +1629,36 @@ RadioFranceProfileIE, RadioFranceProgramScheduleIE, ) -from .radiozet import RadioZetPodcastIE +from .radiojavan import RadioJavanIE from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, ) +from .radiozet import RadioZetPodcastIE from .radlive import ( - RadLiveIE, RadLiveChannelIE, + RadLiveIE, RadLiveSeasonIE, ) from .rai import ( - RaiIE, RaiCulturaIE, + RaiIE, + RaiNewsIE, RaiPlayIE, RaiPlayLiveIE, RaiPlayPlaylistIE, RaiPlaySoundIE, RaiPlaySoundLiveIE, RaiPlaySoundPlaylistIE, - RaiNewsIE, RaiSudtirolIE, ) from .raywenderlich import ( - RayWenderlichIE, RayWenderlichCourseIE, + RayWenderlichIE, ) from .rbgtum import ( - RbgTumIE, RbgTumCourseIE, + RbgTumIE, RbgTumNewCourseIE, ) from .rcs import ( @@ -1600,12 +1672,15 @@ RCTIPlusTVIE, ) from .rds import RDSIE -from .redbee import ParliamentLiveUKIE, RTBFIE +from .redbee import ( + RTBFIE, + ParliamentLiveUKIE, +) from .redbulltv import ( - RedBullTVIE, RedBullEmbedIE, - RedBullTVRrnContentIE, RedBullIE, + RedBullTVIE, + RedBullTVRrnContentIE, ) from .reddit import RedditIE from .redge import RedCDNLivxIE @@ -1625,107 +1700,100 @@ from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( - RinseFMIE, RinseFMArtistPlaylistIE, + RinseFMIE, ) from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE from .rokfin import ( - RokfinIE, - RokfinStackIE, RokfinChannelIE, + RokfinIE, RokfinSearchIE, + RokfinStackIE, +) +from .roosterteeth import ( + RoosterTeethIE, + RoosterTeethSeriesIE, ) -from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE from .rozhlas import ( + MujRozhlasIE, RozhlasIE, RozhlasVltavaIE, - MujRozhlasIE, ) -from .rte import RteIE, RteRadioIE +from .rte import ( + RteIE, + RteRadioIE, +) +from .rtl2 import RTL2IE from .rtlnl import ( - RtlNlIE, - RTLLuTeleVODIE, RTLLuArticleIE, RTLLuLiveIE, RTLLuRadioIE, + RTLLuTeleVODIE, + RtlNlIE, ) -from .rtl2 import RTL2IE from .rtnews import ( - RTNewsIE, RTDocumentryIE, RTDocumentryPlaylistIE, + RTNewsIE, RuptlyIE, ) from .rtp import RTPIE from .rtrfm import RTRFMIE from .rts import RTSIE from .rtvcplay import ( - RTVCPlayIE, - RTVCPlayEmbedIE, RTVCKalturaIE, + RTVCPlayEmbedIE, + RTVCPlayIE, ) from .rtve import ( RTVEALaCartaIE, RTVEAudioIE, - RTVELiveIE, RTVEInfantilIE, + RTVELiveIE, RTVETelevisionIE, ) from .rtvs import RTVSIE from .rtvslo import RTVSLOIE +from .rudovideo import RudoVideoIE from .rule34video import Rule34VideoIE from .rumble import ( + RumbleChannelIE, RumbleEmbedIE, RumbleIE, - RumbleChannelIE, ) -from .rudovideo import RudoVideoIE from .rutube import ( - RutubeIE, RutubeChannelIE, RutubeEmbedIE, + RutubeIE, RutubeMovieIE, RutubePersonIE, RutubePlaylistIE, RutubeTagsIE, ) -from .glomex import ( - GlomexIE, - GlomexEmbedIE, -) -from .megatvcom import ( - MegaTVComIE, - MegaTVComEmbedIE, -) -from .antenna import ( - AntennaGrWatchIE, - Ant1NewsGrArticleIE, - Ant1NewsGrEmbedIE, -) from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import ( RuvIE, - RuvSpilaIE + RuvSpilaIE, ) from .s4c import ( S4CIE, - S4CSeriesIE + S4CSeriesIE, ) from .safari import ( - SafariIE, SafariApiIE, SafariCourseIE, + SafariIE, ) from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE from .sbs import SBSIE from .sbscokr import ( - SBSCoKrIE, SBSCoKrAllvodProgramIE, + SBSCoKrIE, SBSCoKrProgramsVodIE, ) from .screen9 import Screen9IE @@ -1733,24 +1801,27 @@ from .screencastify import ScreencastifyIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ( - ScrippsNetworksWatchIE, ScrippsNetworksIE, + ScrippsNetworksWatchIE, ) +from .scrolller import ScrolllerIE from .scte import ( SCTEIE, SCTECourseIE, ) -from .scrolller import ScrolllerIE from .sejmpl import SejmIE from .senalcolombia import SenalColombiaLiveIE -from .senategov import SenateISVPIE, SenateGovIE +from .senategov import ( + SenateGovIE, + SenateISVPIE, +) from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE from .seznamzpravy import ( - SeznamZpravyIE, SeznamZpravyArticleIE, + SeznamZpravyIE, ) from .shahid import ( ShahidIE, @@ -1758,38 +1829,38 @@ ) from .sharepoint import SharePointIE from .sharevideos import ShareVideosEmbedIE -from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE +from .sibnet import SibnetEmbedIE from .simplecast import ( - SimplecastIE, SimplecastEpisodeIE, + SimplecastIE, SimplecastPodcastIE, ) from .sina import SinaIE from .sixplay import SixPlayIE from .skeb import SkebIE -from .skyit import ( - SkyItPlayerIE, - SkyItVideoIE, - SkyItVideoLiveIE, - SkyItIE, - SkyItArteIE, - CieloTVItIE, - TV8ItIE, -) -from .skylinewebcams import SkylineWebcamsIE -from .skynewsarabia import ( - SkyNewsArabiaIE, - SkyNewsArabiaArticleIE, -) -from .skynewsau import SkyNewsAUIE from .sky import ( SkyNewsIE, SkyNewsStoryIE, SkySportsIE, SkySportsNewsIE, ) +from .skyit import ( + CieloTVItIE, + SkyItArteIE, + SkyItIE, + SkyItPlayerIE, + SkyItVideoIE, + SkyItVideoLiveIE, + TV8ItIE, +) +from .skylinewebcams import SkylineWebcamsIE +from .skynewsarabia import ( + SkyNewsArabiaArticleIE, + SkyNewsArabiaIE, +) +from .skynewsau import SkyNewsAUIE from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE @@ -1806,29 +1877,29 @@ from .soundcloud import ( SoundcloudEmbedIE, SoundcloudIE, - SoundcloudSetIE, + SoundcloudPlaylistIE, SoundcloudRelatedIE, + SoundcloudSearchIE, + SoundcloudSetIE, + SoundcloudTrackStationIE, SoundcloudUserIE, SoundcloudUserPermalinkIE, - SoundcloudTrackStationIE, - SoundcloudPlaylistIE, - SoundcloudSearchIE, ) from .soundgasm import ( SoundgasmIE, - SoundgasmProfileIE + SoundgasmProfileIE, ) from .southpark import ( - SouthParkIE, SouthParkDeIE, SouthParkDkIE, SouthParkEsIE, + SouthParkIE, SouthParkLatIE, - SouthParkNlIE + SouthParkNlIE, ) from .sovietscloset import ( SovietsClosetIE, - SovietsClosetPlaylistIE + SovietsClosetPlaylistIE, ) from .spankbang import ( SpankBangIE, @@ -1839,12 +1910,6 @@ BellatorIE, ParamountNetworkIE, ) -from .stageplus import StagePlusVODConcertIE -from .startrek import StarTrekIE -from .stitcher import ( - StitcherIE, - StitcherShowIE, -) from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE @@ -1868,19 +1933,25 @@ from .stacommu import ( StacommuLiveIE, StacommuVODIE, - TheaterComplexTownVODIE, TheaterComplexTownPPVIE, + TheaterComplexTownVODIE, ) +from .stageplus import StagePlusVODConcertIE from .stanfordoc import StanfordOpenClassroomIE +from .startrek import StarTrekIE from .startv import StarTVIE from .steam import ( - SteamIE, SteamCommunityBroadcastIE, + SteamIE, +) +from .stitcher import ( + StitcherIE, + StitcherShowIE, ) from .storyfire import ( StoryFireIE, - StoryFireUserIE, StoryFireSeriesIE, + StoryFireUserIE, ) from .streamable import StreamableIE from .streamcz import StreamCZIE @@ -1901,20 +1972,26 @@ SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE -from .syvdk import SYVDKIE from .syfy import SyfyIE +from .syvdk import SYVDKIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE +from .taptap import ( + TapTapAppIE, + TapTapAppIntlIE, + TapTapMomentIE, + TapTapPostIntlIE, +) from .tass import TassIE from .tbs import TBSIE from .tbsjp import ( TBSJPEpisodeIE, - TBSJPProgramIE, TBSJPPlaylistIE, + TBSJPProgramIE, ) from .teachable import ( - TeachableIE, TeachableCourseIE, + TeachableIE, ) from .teachertube import ( TeacherTubeIE, @@ -1922,8 +1999,8 @@ ) from .teachingchannel import TeachingChannelIE from .teamcoco import ( - TeamcocoIE, ConanClassicIE, + TeamcocoIE, ) from .teamtreehouse import TeamTreeHouseIE from .ted import ( @@ -1942,15 +2019,18 @@ from .telemb import TeleMBIE from .telemundo import TelemundoIE from .telequebec import ( - TeleQuebecIE, - TeleQuebecSquatIE, TeleQuebecEmissionIE, + TeleQuebecIE, TeleQuebecLiveIE, + TeleQuebecSquatIE, TeleQuebecVideoIE, ) from .teletask import TeleTaskIE from .telewebion import TelewebionIE -from .tempo import TempoIE, IVXPlayerIE +from .tempo import ( + IVXPlayerIE, + TempoIE, +) from .tencent import ( IflixEpisodeIE, IflixSeriesIE, @@ -1974,8 +2054,8 @@ from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( - ThePlatformIE, ThePlatformFeedIE, + ThePlatformIE, ) from .thestar import TheStarIE from .thesun import TheSunIE @@ -1987,50 +2067,52 @@ ThisVidMemberIE, ThisVidPlaylistIE, ) +from .threeqsdn import ThreeQSDNIE from .threespeak import ( ThreeSpeakIE, ThreeSpeakUserIE, ) -from .threeqsdn import ThreeQSDNIE from .tiktok import ( - TikTokIE, - TikTokUserIE, - TikTokSoundIE, - TikTokEffectIE, - TikTokTagIE, - TikTokVMIE, - TikTokLiveIE, DouyinIE, + TikTokCollectionIE, + TikTokEffectIE, + TikTokIE, + TikTokLiveIE, + TikTokSoundIE, + TikTokTagIE, + TikTokUserIE, + TikTokVMIE, ) from .tmz import TMZIE from .tnaflix import ( - TNAFlixNetworkEmbedIE, - TNAFlixIE, EMPFlixIE, MovieFapIE, + TNAFlixIE, + TNAFlixNetworkEmbedIE, ) from .toggle import ( - ToggleIE, MeWatchIE, + ToggleIE, ) -from .toggo import ( - ToggoIE, -) +from .toggo import ToggoIE from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE -from .toypics import ToypicsUserIE, ToypicsIE +from .toypics import ( + ToypicsIE, + ToypicsUserIE, +) from .traileraddict import TrailerAddictIE from .triller import ( TrillerIE, - TrillerUserIE, TrillerShortIE, + TrillerUserIE, ) from .trovo import ( + TrovoChannelClipIE, + TrovoChannelVodIE, TrovoIE, TrovoVodIE, - TrovoChannelVodIE, - TrovoChannelClipIE, ) from .trtcocuk import TrtCocukVideoIE from .trtworld import TrtWorldIE @@ -2039,26 +2121,26 @@ from .truth import TruthIE from .trutv import TruTVIE from .tube8 import Tube8IE -from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE +from .tubetugraz import ( + TubeTuGrazIE, + TubeTuGrazSeriesIE, +) from .tubitv import ( TubiTvIE, TubiTvShowIE, ) from .tumblr import TumblrIE from .tunein import ( - TuneInStationIE, - TuneInPodcastIE, TuneInPodcastEpisodeIE, + TuneInPodcastIE, TuneInShortenerIE, + TuneInStationIE, ) from .tv2 import ( TV2IE, - TV2ArticleIE, KatsomoIE, MTVUutisetArticleIE, -) -from .tv24ua import ( - TV24UAVideoIE, + TV2ArticleIE, ) from .tv2dk import ( TV2DKIE, @@ -2071,16 +2153,17 @@ from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tv5unis import ( - TV5UnisVideoIE, TV5UnisIE, + TV5UnisVideoIE, ) +from .tv24ua import TV24UAVideoIE from .tva import ( TVAIE, QubIE, ) from .tvanouvelles import ( - TVANouvellesIE, TVANouvellesArticleIE, + TVANouvellesIE, ) from .tvc import ( TVCIE, @@ -2093,19 +2176,19 @@ from .tvn24 import TVN24IE from .tvnoe import TVNoeIE from .tvopengr import ( - TVOpenGrWatchIE, TVOpenGrEmbedIE, + TVOpenGrWatchIE, ) from .tvp import ( - TVPEmbedIE, TVPIE, + TVPEmbedIE, TVPStreamIE, TVPVODSeriesIE, TVPVODVideoIE, ) from .tvplay import ( - TVPlayIE, TVPlayHomeIE, + TVPlayIE, ) from .tvplayer import TVPlayerIE from .tweakers import TweakersIE @@ -2117,29 +2200,29 @@ TwitCastingUserIE, ) from .twitch import ( - TwitchVodIE, + TwitchClipsIE, TwitchCollectionIE, - TwitchVideosIE, + TwitchStreamIE, TwitchVideosClipsIE, TwitchVideosCollectionsIE, - TwitchStreamIE, - TwitchClipsIE, + TwitchVideosIE, + TwitchVodIE, ) from .twitter import ( - TwitterCardIE, - TwitterIE, TwitterAmplifyIE, TwitterBroadcastIE, - TwitterSpacesIE, + TwitterCardIE, + TwitterIE, TwitterShortenerIE, + TwitterSpacesIE, ) from .txxx import ( - TxxxIE, PornTopIE, + TxxxIE, ) from .udemy import ( + UdemyCourseIE, UdemyIE, - UdemyCourseIE ) from .udn import UDNEmbedIE from .ufctv import ( @@ -2148,16 +2231,13 @@ ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE -from .digiteka import DigitekaIE -from .dlive import ( - DLiveVODIE, - DLiveStreamIE, -) -from .drooble import DroobleIE from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE -from .unsupported import KnownDRMIE, KnownPiracyIE +from .unsupported import ( + KnownDRMIE, + KnownPiracyIE, +) from .uol import UOLIE from .uplynk import ( UplynkIE, @@ -2167,10 +2247,13 @@ from .urplay import URPlayIE from .usanetwork import USANetworkIE from .usatoday import USATodayIE -from .ustream import UstreamIE, UstreamChannelIE +from .ustream import ( + UstreamChannelIE, + UstreamIE, +) from .ustudio import ( - UstudioIE, UstudioEmbedIE, + UstudioIE, ) from .utreon import UtreonIE from .varzesh3 import Varzesh3IE @@ -2178,7 +2261,7 @@ from .veo import VeoIE from .veoh import ( VeohIE, - VeohUserIE + VeohUserIE, ) from .vesti import VestiIE from .vevo import ( @@ -2186,14 +2269,14 @@ VevoPlaylistIE, ) from .vgtv import ( + VGTVIE, BTArticleIE, BTVestlendingenIE, - VGTVIE, ) from .vh1 import VH1IE from .vice import ( - ViceIE, ViceArticleIE, + ViceIE, ViceShowIE, ) from .viddler import ViddlerIE @@ -2205,42 +2288,46 @@ from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videoken import ( + VideoKenCategoryIE, VideoKenIE, VideoKenPlayerIE, VideoKenPlaylistIE, - VideoKenCategoryIE, VideoKenTopicIE, ) from .videomore import ( VideomoreIE, - VideomoreVideoIE, VideomoreSeasonIE, + VideomoreVideoIE, ) from .videopress import VideoPressIE from .vidio import ( VidioIE, + VidioLiveIE, VidioPremierIE, - VidioLiveIE ) from .vidlii import VidLiiIE from .vidly import VidlyIE from .viewlift import ( - ViewLiftIE, ViewLiftEmbedIE, + ViewLiftIE, ) from .viidea import ViideaIE +from .viki import ( + VikiChannelIE, + VikiIE, +) from .vimeo import ( - VimeoIE, + VHXEmbedIE, VimeoAlbumIE, VimeoChannelIE, VimeoGroupsIE, + VimeoIE, VimeoLikesIE, VimeoOndemandIE, VimeoProIE, VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, - VHXEmbedIE, ) from .vimm import ( VimmIE, @@ -2250,50 +2337,41 @@ VineIE, VineUserIE, ) -from .viki import ( - VikiIE, - VikiChannelIE, -) from .viously import ViouslyIE from .viqeo import ViqeoIE from .viu import ( ViuIE, - ViuPlaylistIE, ViuOTTIE, ViuOTTIndonesiaIE, + ViuPlaylistIE, ) from .vk import ( VKIE, - VKUserVideosIE, - VKWallPostIE, VKPlayIE, VKPlayLiveIE, + VKUserVideosIE, + VKWallPostIE, ) from .vocaroo import VocarooIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicy import ( - VoicyIE, VoicyChannelIE, + VoicyIE, ) from .volejtv import VolejTVIE -from .voot import ( - VootIE, - VootSeriesIE, -) from .voxmedia import ( - VoxMediaVolumeIE, VoxMediaIE, + VoxMediaVolumeIE, ) from .vrt import ( VRTIE, - VrtNUIE, - KetnetIE, DagelijkseKostIE, + KetnetIE, Radio1BeIE, + VrtNUIE, ) from .vtm import VTMIE -from .medialaan import MedialaanIE from .vuclip import VuClipIE from .vvvvid import ( VVVVIDIE, @@ -2301,20 +2379,20 @@ ) from .walla import WallaIE from .washingtonpost import ( - WashingtonPostIE, WashingtonPostArticleIE, + WashingtonPostIE, ) from .wat import WatIE from .wdr import ( WDRIE, - WDRPageIE, WDRElefantIE, WDRMobileIE, + WDRPageIE, ) from .webcamerapl import WebcameraplIE from .webcaster import ( - WebcasterIE, WebcasterFeedIE, + WebcasterIE, ) from .webofstories import ( WebOfStoriesIE, @@ -2322,42 +2400,42 @@ ) from .weibo import ( WeiboIE, - WeiboVideoIE, WeiboUserIE, + WeiboVideoIE, ) from .weiqitv import WeiqiTVIE from .weverse import ( WeverseIE, - WeverseMediaIE, - WeverseMomentIE, - WeverseLiveTabIE, - WeverseMediaTabIE, WeverseLiveIE, + WeverseLiveTabIE, + WeverseMediaIE, + WeverseMediaTabIE, + WeverseMomentIE, ) from .wevidi import WeVidiIE from .weyyak import WeyyakIE +from .whowatch import WhoWatchIE from .whyp import WhypIE from .wikimedia import WikimediaIE from .wimbledon import WimbledonIE from .wimtv import WimTVIE -from .whowatch import WhoWatchIE from .wistia import ( + WistiaChannelIE, WistiaIE, WistiaPlaylistIE, - WistiaChannelIE, ) from .wordpress import ( - WordpressPlaylistEmbedIE, WordpressMiniAudioPlayerEmbedIE, + WordpressPlaylistEmbedIE, ) from .worldstarhiphop import WorldStarHipHopIE from .wppilot import ( - WPPilotIE, WPPilotChannelsIE, + WPPilotIE, ) from .wrestleuniverse import ( - WrestleUniverseVODIE, WrestleUniversePPVIE, + WrestleUniverseVODIE, ) from .wsj import ( WSJIE, @@ -2365,21 +2443,22 @@ ) from .wwe import WWEIE from .wykop import ( - WykopDigIE, WykopDigCommentIE, - WykopPostIE, + WykopDigIE, WykopPostCommentIE, + WykopPostIE, ) from .xanimu import XanimuIE from .xboxclips import XboxClipsIE from .xhamster import ( - XHamsterIE, XHamsterEmbedIE, + XHamsterIE, XHamsterUserIE, ) +from .xiaohongshu import XiaoHongShuIE from .ximalaya import ( + XimalayaAlbumIE, XimalayaIE, - XimalayaAlbumIE ) from .xinpianchang import XinpianchangIE from .xminus import XMinusIE @@ -2387,27 +2466,27 @@ from .xstream import XstreamIE from .xvideos import ( XVideosIE, - XVideosQuickiesIE + XVideosQuickiesIE, ) from .xxxymovies import XXXYMoviesIE from .yahoo import ( YahooIE, - YahooSearchIE, YahooJapanNewsIE, + YahooSearchIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( - YandexMusicTrackIE, YandexMusicAlbumIE, - YandexMusicPlaylistIE, - YandexMusicArtistTracksIE, YandexMusicArtistAlbumsIE, + YandexMusicArtistTracksIE, + YandexMusicPlaylistIE, + YandexMusicTrackIE, ) from .yandexvideo import ( YandexVideoIE, YandexVideoPreviewIE, - ZenYandexIE, ZenYandexChannelIE, + ZenYandexIE, ) from .yapfiles import YapFilesIE from .yappy import ( @@ -2421,24 +2500,34 @@ YoukuShowIE, ) from .younow import ( - YouNowLiveIE, YouNowChannelIE, + YouNowLiveIE, YouNowMomentIE, ) -from .youporn import YouPornIE +from .youporn import ( + YouPornCategoryIE, + YouPornChannelIE, + YouPornCollectionIE, + YouPornIE, + YouPornStarIE, + YouPornTagIE, + YouPornVideosIE, +) from .zaiko import ( - ZaikoIE, ZaikoETicketIE, + ZaikoIE, ) from .zapiks import ZapiksIE from .zattoo import ( BBVTVIE, + EWETVIE, + SAKTVIE, + VTXTVIE, BBVTVLiveIE, BBVTVRecordingsIE, EinsUndEinsTVIE, EinsUndEinsTVLiveIE, EinsUndEinsTVRecordingsIE, - EWETVIE, EWETVLiveIE, EWETVRecordingsIE, GlattvisionTVIE, @@ -2456,13 +2545,11 @@ QuantumTVIE, QuantumTVLiveIE, QuantumTVRecordingsIE, + SAKTVLiveIE, + SAKTVRecordingsIE, SaltTVIE, SaltTVLiveIE, SaltTVRecordingsIE, - SAKTVIE, - SAKTVLiveIE, - SAKTVRecordingsIE, - VTXTVIE, VTXTVLiveIE, VTXTVRecordingsIE, WalyTVIE, @@ -2473,7 +2560,10 @@ ZattooMoviesIE, ZattooRecordingsIE, ) -from .zdf import ZDFIE, ZDFChannelIE +from .zdf import ( + ZDFIE, + ZDFChannelIE, +) from .zee5 import ( Zee5IE, Zee5SeriesIE, @@ -2483,16 +2573,16 @@ from .zetland import ZetlandDKArticleIE from .zhihu import ZhihuIE from .zingmp3 import ( - ZingMp3IE, ZingMp3AlbumIE, ZingMp3ChartHomeIE, - ZingMp3WeekChartIE, ZingMp3ChartMusicVideoIE, - ZingMp3UserIE, ZingMp3HubIE, + ZingMp3IE, ZingMp3LiveRadioIE, ZingMp3PodcastEpisodeIE, ZingMp3PodcastIE, + ZingMp3UserIE, + ZingMp3WeekChartIE, ) from .zoom import ZoomIE from .zype import ZypeIE diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index b21742281..2c0d296fd 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -6,10 +6,10 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - dict_get, ExtractorError, - js_to_json, + dict_get, int_or_none, + js_to_json, parse_iso8601, str_or_none, traverse_obj, diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index fee7375ea..b8c79b912 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -12,20 +12,21 @@ import urllib.request import urllib.response import uuid -from ..utils.networking import clean_proxies + from .common import InfoExtractor from ..aes import aes_ecb_decrypt from ..utils import ( ExtractorError, + OnDemandPagedList, bytes_to_intlist, decode_base_n, int_or_none, intlist_to_bytes, - OnDemandPagedList, time_seconds, traverse_obj, update_url_query, ) +from ..utils.networking import clean_proxies def add_opener(ydl, handler): # FIXME: Create proper API in .networking diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py index c3b4f432e..07933192f 100644 --- a/yt_dlp/extractor/acfun.py +++ b/yt_dlp/extractor/acfun.py @@ -3,10 +3,10 @@ float_or_none, format_field, int_or_none, - str_or_none, - traverse_obj, parse_codecs, parse_qs, + str_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 898d37298..2f3b67dad 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -10,18 +10,18 @@ from ..compat import compat_b64decode from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, ass_subtitles_timecode, bytes_to_intlist, bytes_to_long, - ExtractorError, float_or_none, int_or_none, intlist_to_bytes, long_to_bytes, parse_iso8601, pkcs1pad, - strip_or_none, str_or_none, + strip_or_none, try_get, unified_strdate, urlencode_postdata, diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index d1525a1af..08e9e5182 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -4,11 +4,11 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ISO639Utils, + OnDemandPagedList, float_or_none, int_or_none, - ISO639Utils, join_nonempty, - OnDemandPagedList, parse_duration, str_or_none, str_to_int, diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py index 0b73a966e..6cc63cd7f 100644 --- a/yt_dlp/extractor/airtv.py +++ b/yt_dlp/extractor/airtv.py @@ -5,7 +5,7 @@ int_or_none, mimetype2ext, parse_iso8601, - traverse_obj + traverse_obj, ) diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 87219f2f8..49df4bf3a 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -12,7 +12,6 @@ ) from ..utils.traversal import traverse_obj - _FIELDS = ''' _id clipImageSource diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index 8d5b472d3..f927965de 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( - parse_iso8601, + int_or_none, parse_duration, parse_filesize, - int_or_none, + parse_iso8601, ) diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index b785c62c3..cb2b9891e 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,17 +1,13 @@ import re from .common import InfoExtractor - -from ..compat import ( - compat_urlparse, -) - +from ..compat import compat_urlparse from ..utils import ( + ExtractorError, + clean_html, + int_or_none, urlencode_postdata, urljoin, - int_or_none, - clean_html, - ExtractorError ) diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 5018710e0..509b21a53 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from .youtube import YoutubeIE from .vimeo import VimeoIE +from .youtube import YoutubeIE from ..utils import ( int_or_none, parse_iso8601, diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 0d259c549..6b2bf2db2 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, mimetype2ext, parse_iso8601, diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py index 52f2ad057..5e78f372e 100644 --- a/yt_dlp/extractor/anchorfm.py +++ b/yt_dlp/extractor/anchorfm.py @@ -5,7 +5,7 @@ int_or_none, str_or_none, traverse_obj, - unified_timestamp + unified_timestamp, ) diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py index 306b3651e..9f5b9b523 100644 --- a/yt_dlp/extractor/angel.py +++ b/yt_dlp/extractor/angel.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from ..utils import url_or_none, merge_dicts +from ..utils import merge_dicts, url_or_none class AngelIE(InfoExtractor): diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index d00b0f906..433eb4ed8 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - str_to_int, - ExtractorError -) +from ..utils import ExtractorError, str_to_int class AppleConnectIE(InfoExtractor): diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 2e0b0a8c9..21103aee5 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index a493714d1..9a5524aab 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -4,8 +4,8 @@ compat_urllib_parse_urlparse, ) from ..utils import ( - format_field, float_or_none, + format_field, int_or_none, parse_iso8601, remove_start, diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index d60feba31..20ee34cca 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -2,10 +2,10 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, float_or_none, jwt_encode_hs256, try_get, - ExtractorError, ) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index 6fc938de9..a8dfb3efc 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_str, + compat_urllib_parse_urlencode, ) from ..utils import ( format_field, diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index 67af29a96..c4e07a79a 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -2,12 +2,12 @@ from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlparse, compat_parse_qs, + compat_urllib_parse_urlparse, ) from ..utils import ( - format_field, InAdvancePagedList, + format_field, traverse_obj, unified_timestamp, ) diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 51e722057..82dc9ab02 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -2,11 +2,11 @@ from .common import InfoExtractor from ..utils import ( - try_get, - int_or_none, - url_or_none, float_or_none, + int_or_none, + try_get, unified_timestamp, + url_or_none, ) diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 042b3220b..da98ac314 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, str_or_none, diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index e875957cf..aa3d63ee7 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .amp import AMPIE +from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index 3d6e03304..ef0151de6 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -1,3 +1,4 @@ +from .common import InfoExtractor from ..utils import ( mimetype2ext, parse_duration, @@ -5,7 +6,6 @@ str_or_none, traverse_obj, ) -from .common import InfoExtractor class BloggerIE(InfoExtractor): diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 92f8ea2cb..267586687 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( extract_attributes, ) diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py index 51f9eb787..da06cc3f8 100644 --- a/yt_dlp/extractor/boxcast.py +++ b/yt_dlp/extractor/boxcast.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - js_to_json, - traverse_obj, - unified_timestamp -) +from ..utils import js_to_json, traverse_obj, unified_timestamp class BoxCastVideoIE(InfoExtractor): diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py index 1200437e6..04b1dd80c 100644 --- a/yt_dlp/extractor/brainpop.py +++ b/yt_dlp/extractor/brainpop.py @@ -6,7 +6,7 @@ classproperty, int_or_none, traverse_obj, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 61b18412d..4190e1a09 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -12,10 +12,11 @@ ) from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + UnsupportedError, clean_html, dict_get, extract_attributes, - ExtractorError, find_xpath_attr, fix_xml_ampersands, float_or_none, @@ -29,7 +30,6 @@ try_get, unescapeHTML, unsmuggle_url, - UnsupportedError, update_url_query, url_or_none, ) diff --git a/yt_dlp/extractor/brilliantpala.py b/yt_dlp/extractor/brilliantpala.py index 0bf8622c1..950a70a5e 100644 --- a/yt_dlp/extractor/brilliantpala.py +++ b/yt_dlp/extractor/brilliantpala.py @@ -27,8 +27,17 @@ def _get_logged_in_username(self, url, video_id): r'"username"\s*:\s*"(?P[^"]+)"', webpage, 'logged-in username') def _perform_login(self, username, password): - login_form = self._hidden_inputs(self._download_webpage( - self._LOGIN_API, None, 'Downloading login page')) + login_page, urlh = self._download_webpage_handle( + self._LOGIN_API, None, 'Downloading login page', expected_status=401) + if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API): + self.write_debug('Cookies are valid, no login required.') + return + + if urlh.status == 401: + self.write_debug('Got HTTP Error 401; cookies have been invalidated') + login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page') + + login_form = self._hidden_inputs(login_page) login_form.update({ 'username': username, 'password': password, diff --git a/yt_dlp/extractor/caffeinetv.py b/yt_dlp/extractor/caffeinetv.py new file mode 100644 index 000000000..aa107f858 --- /dev/null +++ b/yt_dlp/extractor/caffeinetv.py @@ -0,0 +1,74 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + parse_iso8601, + traverse_obj, + urljoin, +) + + +class CaffeineTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P[\da-f-]+)' + _TESTS = [{ + 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e', + 'info_dict': { + 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e', + 'ext': 'mp4', + 'title': 'GOOOOD MORNINNNNN #highlights', + 'timestamp': 1654702180, + 'upload_date': '20220608', + 'uploader': 'RahJON Wicc', + 'uploader_id': 'TsuSurf', + 'duration': 3145, + 'age_limit': 17, + 'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'tags': ['highlights', 'battlerap'], + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id) + broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {} + + video_url = broadcast_info['video_url'] + ext = determine_ext(video_url) + if ext == 'm3u8': + formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') + else: + formats = [{'url': video_url}] + + return { + 'id': video_id, + 'formats': formats, + **traverse_obj(json_data, { + 'like_count': ('like_count', {int_or_none}), + 'view_count': ('view_count', {int_or_none}), + 'comment_count': ('comment_count', {int_or_none}), + 'tags': ('tags', ..., {str}, {lambda x: x or None}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': (((None, 'user'), 'username'), {str}, any), + 'is_live': ('is_live', {bool}), + }), + **traverse_obj(broadcast_info, { + 'title': ('broadcast_title', {str}), + 'duration': ('content_duration', {int_or_none}), + 'timestamp': ('broadcast_start_time', {parse_iso8601}), + 'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}), + }), + 'age_limit': { + # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system + 'FOUR_PLUS': 0, + 'NINE_PLUS': 9, + 'TWELVE_PLUS': 12, + 'SEVENTEEN_PLUS': 17, + }.get(broadcast_info.get('content_rating'), 17), + } diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index cf830210f..aca9782c7 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -5,14 +5,14 @@ from ..utils import ( ExtractorError, extract_attributes, + find_xpath_attr, get_element_html_by_id, int_or_none, - find_xpath_attr, smuggle_url, - xpath_element, - xpath_text, update_url_query, url_or_none, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 156b6a324..5d6335729 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -101,7 +101,7 @@ def _real_extract(self, url): site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: - playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] + playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0] playlist_description = self._og_search_description(webpage, default=None) if playlist_description: playlist_description = playlist_description.replace('\xa0', ' ') diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py index 9cffa11e8..745b71f24 100644 --- a/yt_dlp/extractor/cinetecamilano.py +++ b/yt_dlp/extractor/cinetecamilano.py @@ -1,4 +1,5 @@ import json + from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index 006a713b2..67b56e00d 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -1,11 +1,11 @@ +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, qualities, ) -import re - class ClippitIE(InfoExtractor): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 2964553e1..b0cf1e308 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,5 +1,6 @@ import base64 import collections +import functools import getpass import hashlib import http.client @@ -21,7 +22,6 @@ import urllib.request import xml.etree.ElementTree -from ..compat import functools # isort: split from ..compat import ( compat_etree_fromstring, compat_expanduser, @@ -2451,7 +2451,7 @@ def _parse_smil_formats_and_subtitles( }) continue - src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src) + src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src) src_url = src_url.strip() if proto == 'm3u8' or src_ext == 'm3u8': @@ -3398,23 +3398,16 @@ def manifest_url(manifest): return formats def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): - mobj = re.search( - r'''(?s)jwplayer\s*\(\s*(?P'|")(?!(?P=q)).+(?P=q)\s*\)(?!).*?\.\s*setup\s*\(\s*(?P(?:\([^)]*\)|[^)])+)\s*\)''', - webpage) - if mobj: - try: - jwplayer_data = self._parse_json(mobj.group('options'), - video_id=video_id, - transform_source=transform_source) - except ExtractorError: - pass - else: - if isinstance(jwplayer_data, dict): - return jwplayer_data + return self._search_json( + r'''(?'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!).)*?\.\s*(?:setup\s*\(|(?Pload)\s*\(\s*\[)''', + webpage, 'JWPlayer data', video_id, + # must be a {...} or sequence, ending + contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))', + transform_source=transform_source, default=None) - def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): + def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs): jwplayer_data = self._find_jwplayer_data( - webpage, video_id, transform_source=js_to_json) + webpage, video_id, transform_source=transform_source) return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) @@ -3446,22 +3439,14 @@ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) subtitles = {} - tracks = video_data.get('tracks') - if tracks and isinstance(tracks, list): - for track in tracks: - if not isinstance(track, dict): - continue - track_kind = track.get('kind') - if not track_kind or not isinstance(track_kind, str): - continue - if track_kind.lower() not in ('captions', 'subtitles'): - continue - track_url = urljoin(base_url, track.get('file')) - if not track_url: - continue - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track_url) - }) + for track in traverse_obj(video_data, ( + 'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))): + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) entry = { 'id': this_video_id, @@ -3546,7 +3531,7 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) + r'((?:mp4|mp3|flv):)', source_url, maxsplit=1) if len(rtmp_url_parts) == 3: rtmp_url, prefix, play_path = rtmp_url_parts a_format.update({ diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index bcc34ddd8..0a98c980f 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -1,7 +1,7 @@ from .theplatform import ThePlatformFeedIE from ..utils import ( - dict_get, ExtractorError, + dict_get, float_or_none, int_or_none, ) diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index 1ef90b5a0..0cb7d940c 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, determine_ext, float_or_none, int_or_none, @@ -13,7 +14,6 @@ parse_age_limit, parse_duration, url_or_none, - ExtractorError ) diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index 0075680e8..e56584e4e 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -1,10 +1,12 @@ import re from .common import InfoExtractor +from .senategov import SenateISVPIE +from .ustream import UstreamIE from ..compat import compat_HTMLParseError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, extract_attributes, find_xpath_attr, get_element_by_attribute, @@ -19,8 +21,6 @@ str_to_int, unescapeHTML, ) -from .senategov import SenateISVPIE -from .ustream import UstreamIE class CSpanIE(InfoExtractor): diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py index cec178f03..1817bd2ff 100644 --- a/yt_dlp/extractor/ctsnews.py +++ b/yt_dlp/extractor/ctsnews.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from ..utils import unified_timestamp from .youtube import YoutubeIE +from ..utils import unified_timestamp class CtsNewsIE(InfoExtractor): diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 43401e111..4c25bea11 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - int_or_none, determine_protocol, + int_or_none, try_get, unescapeHTML, ) diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py index 5e14d6aff..2e0f6f0d3 100644 --- a/yt_dlp/extractor/damtomo.py +++ b/yt_dlp/extractor/damtomo.py @@ -1,8 +1,8 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate from ..compat import compat_str +from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate class DamtomoBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py new file mode 100644 index 000000000..50e4136b5 --- /dev/null +++ b/yt_dlp/extractor/dangalplay.py @@ -0,0 +1,197 @@ +import hashlib +import json +import re +import time + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none +from ..utils.traversal import traverse_obj + + +class DangalPlayBaseIE(InfoExtractor): + _NETRC_MACHINE = 'dangalplay' + _OTV_USER_ID = None + _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _API_BASE = 'https://ottapi.dangalplay.com' + _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js + _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above + + def _perform_login(self, username, password): + if self._OTV_USER_ID: + return + if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + raise ExtractorError(self._LOGIN_HINT, expected=True) + self._OTV_USER_ID = password + + def _real_initialize(self): + if not self._OTV_USER_ID: + self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None) + + def _extract_episode_info(self, metadata, episode_slug, series_slug): + return { + 'display_id': episode_slug, + 'episode_number': int_or_none(self._search_regex( + r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)), + 'season_number': int_or_none(self._search_regex( + r'season-(\d+)', series_slug, 'season number', default='1')), + 'series': series_slug, + **traverse_obj(metadata, { + 'id': ('content_id', {str}), + 'title': ('display_title', {str}), + 'episode': ('title', {str}), + 'series': ('show_name', {str}, {lambda x: x or None}), + 'series_id': ('catalog_id', {str}), + 'duration': ('duration', {int_or_none}), + 'release_timestamp': ('release_date_uts', {int_or_none}), + }), + } + + def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}): + return self._download_json( + f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, + headers={'Accept': 'application/json'}, query={ + 'auth_token': self._AUTH_TOKEN, + 'region': 'IN', + **query, + }) + + +class DangalPlayIE(DangalPlayBaseIE): + IE_NAME = 'dangalplay' + _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P[^/?#]+)/(?P(?!episodes)[^/?#]+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01', + 'info_dict': { + 'id': '647c61dc1e7171310dcd49b4', + 'ext': 'mp4', + 'release_timestamp': 1262304000, + 'episode_number': 1, + 'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2', + 'series': 'kitani-mohabbat-hai-season-2', + 'season_number': 2, + 'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2', + 'release_date': '20100101', + 'duration': 2325, + 'season': 'Season 2', + 'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01', + 'series_id': '645c9ea41e717158ca574966', + }, + }, { + 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01', + 'info_dict': { + 'id': '65d31d9ba73b9c3abd14a7f3', + 'ext': 'mp4', + 'episode': 'EP 1 | MILKE BHI HUM NA MILE', + 'release_timestamp': 1708367411, + 'episode_number': 1, + 'season': 'Season 1', + 'title': 'EP 1 | MILKE BHI HUM NA MILE', + 'duration': 156048, + 'release_date': '20240219', + 'season_number': 1, + 'series': 'MILKE BHI HUM NA MILE', + 'series_id': '645c9ea41e717158ca574966', + 'display_id': 'milke-bhi-hum-na-mile-ep-number-01', + }, + }] + + def _generate_api_data(self, data): + catalog_id = data['catalog_id'] + content_id = data['content_id'] + timestamp = str(int(time.time())) + unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY)) + + return json.dumps({ + 'catalog_id': catalog_id, + 'content_id': content_id, + 'category': '', + 'region': 'IN', + 'auth_token': self._AUTH_TOKEN, + 'id': self._OTV_USER_ID, + 'md5': hashlib.md5(unhashed.encode()).hexdigest(), + 'ts': timestamp, + }, separators=(',', ':')).encode() + + def _real_extract(self, url): + series_slug, episode_slug = self._match_valid_url(url).group('series', 'id') + metadata = self._call_api( + f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip', + episode_slug, query={'item_language': ''})['data'] + + try: + details = self._download_json( + f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug, + 'Downloading playback details JSON', headers={ + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }, data=self._generate_api_data(metadata))['data'] + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 422: + error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} + if error_info.get('code') == '1016': + self.raise_login_required( + f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) + elif msg := error_info.get('message'): + raise ExtractorError(msg) + raise + + m3u8_url = traverse_obj(details, ( + ('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any)) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4') + + return { + 'formats': formats, + 'subtitles': subtitles, + **self._extract_episode_info(metadata, episode_slug, series_slug), + } + + +class DangalPlaySeasonIE(DangalPlayBaseIE): + IE_NAME = 'dangalplay:season' + _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P[^/?#]+)(?:/(?Pep-[^/?#]+)/episodes)?/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1', + 'playlist_mincount': 170, + 'info_dict': { + 'id': 'kitani-mohabbat-hai-season-1', + }, + }, { + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes', + 'playlist_count': 30, + 'info_dict': { + 'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1', + }, + }, { + # 1 season only, series page is season page + 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile', + 'playlist_mincount': 15, + 'info_dict': { + 'id': 'milke-bhi-hum-na-mile', + }, + }] + + def _entries(self, subcategories, series_slug): + for subcategory in subcategories: + data = self._call_api( + f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip', + series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={ + 'order_by': 'asc', + 'status': 'published', + }) + for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])): + episode_slug = ep['friendly_id'] + yield self.url_result( + f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}', + DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug)) + + def _real_extract(self, url): + series_slug, subcategory = self._match_valid_url(url).group('id', 'sub') + subcategories = [subcategory] if subcategory else traverse_obj( + self._call_api( + f'catalogs/shows/items/{series_slug}.gzip', series_slug, + 'Downloading season info JSON', query={'item_language': ''}), + ('data', 'subcategories', ..., 'friendly_id', {str})) + + return self.playlist_result( + self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory)) diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py index 1624d085c..177424937 100644 --- a/yt_dlp/extractor/democracynow.py +++ b/yt_dlp/extractor/democracynow.py @@ -1,11 +1,11 @@ -import re import os.path +import re from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - url_basename, remove_start, + url_basename, ) diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index c11cd790b..4380c414e 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( ExtractorError, parse_resolution, diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py index 1f3d8e31c..b2663a63d 100644 --- a/yt_dlp/extractor/discoverygo.py +++ b/yt_dlp/extractor/discoverygo.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, extract_attributes, - ExtractorError, int_or_none, parse_age_limit, remove_end, diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py index 430de326f..d8dde0ca7 100644 --- a/yt_dlp/extractor/disney.py +++ b/yt_dlp/extractor/disney.py @@ -2,10 +2,10 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, - unified_strdate, determine_ext, + int_or_none, join_nonempty, + unified_strdate, update_url_query, ) diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index ee8893d5a..244ffdf1c 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -1,5 +1,5 @@ -import time import hashlib +import time import urllib import uuid diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 363b4bec9..ddf2128b0 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, float_or_none, int_or_none, remove_start, @@ -355,12 +355,10 @@ def _download_video_playback_info(self, disco_base, video_id, headers): video_id, headers=headers, data=json.dumps({ 'deviceInfo': { 'adBlocker': False, + 'drmSupported': False, }, 'videoId': video_id, - 'wisteriaProperties': { - 'platform': 'desktop', - 'product': self._PRODUCT, - }, + 'wisteriaProperties': {}, }).encode('utf-8'))['data']['attributes']['streaming'] def _real_extract(self, url): @@ -878,10 +876,31 @@ def _update_disco_api_headers(self, headers, disco_base, display_id, realm): }) -class DiscoveryNetworksDeIE(DPlayBaseIE): +class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?(?P(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P[^/]+)/(?:video/)?(?P[^/]+)' _TESTS = [{ + 'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold', + 'info_dict': { + 'id': '4756322', + 'ext': 'mp4', + 'title': 'German Gold', + 'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6', + 'display_id': 'goldrausch-in-australien/german-gold', + 'episode': 'Episode 1', + 'episode_number': 1, + 'season': 'Season 5', + 'season_number': 5, + 'series': 'Goldrausch in Australien', + 'duration': 2648.0, + 'upload_date': '20230517', + 'timestamp': 1684357500, + 'creators': ['DMAX'], + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg', + 'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'info_dict': { 'id': '78867', @@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE): 'season_number': 1, 'thumbnail': r're:https://.+\.jpg', }, - 'params': { - 'skip_download': True, - }, + 'skip': '404 Not Found', }, { 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', 'only_matching': True, @@ -920,8 +937,14 @@ def _real_extract(self, url): country = 'GB' if domain == 'dplay.co.uk' else 'DE' realm = 'questuk' if country == 'GB' else domain.replace('.', '') return self._get_disco_api_info( - url, '%s/%s' % (programme, alternate_id), - 'sonic-eu1-prod.disco-api.com', realm, country) + url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm}', + 'x-disco-client': 'Alps:HyogaPlayer:0.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) class DiscoveryPlusShowBaseIE(DPlayBaseIE): diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index e5dab6ac0..a9247edc0 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, NO_DEFAULT, + int_or_none, parse_duration, str_to_int, ) diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index 626e577e7..adc7705bc 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -5,9 +5,9 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + ExtractorError, clean_html, extract_attributes, - ExtractorError, get_elements_by_class, int_or_none, js_to_json, diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index e67143370..e6660dcd9 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -2,15 +2,15 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, join_nonempty, js_to_json, mimetype2ext, + parse_iso8601, try_get, unescapeHTML, - parse_iso8601, ) diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index f7b852076..feab804af 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -1,10 +1,10 @@ from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( int_or_none, unified_strdate, url_or_none, ) -from ..compat import compat_urlparse class DWIE(InfoExtractor): diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 9ecdf5d3b..19c6933e7 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -4,15 +4,15 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, dict_get, int_or_none, merge_dicts, - parse_qs, parse_age_limit, parse_iso8601, + parse_qs, str_or_none, try_get, url_or_none, diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index 29dfc8ae9..0cf889a1e 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -8,7 +8,7 @@ qualities, traverse_obj, unified_strdate, - xpath_text + xpath_text, ) diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 65a1dc7c5..66fa42fa1 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -1,8 +1,7 @@ from .common import InfoExtractor - from ..utils import ( - parse_duration, js_to_json, + parse_duration, ) diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py index d8b068e9c..4a13ab08d 100644 --- a/yt_dlp/extractor/eyedotv.py +++ b/yt_dlp/extractor/eyedotv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..utils import ( - xpath_text, - parse_duration, ExtractorError, + parse_duration, + xpath_text, ) diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index cddf25497..1e80f9a37 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -1,12 +1,6 @@ from .common import InfoExtractor - from ..compat import compat_str -from ..utils import ( - parse_iso8601, - ExtractorError, - try_get, - mimetype2ext -) +from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get class FancodeVodIE(InfoExtractor): diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py index bca62add9..796bac3c3 100644 --- a/yt_dlp/extractor/faz.py +++ b/yt_dlp/extractor/faz.py @@ -3,9 +3,9 @@ from .common import InfoExtractor from ..compat import compat_etree_fromstring from ..utils import ( + int_or_none, xpath_element, xpath_text, - int_or_none, ) diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py index 8175b6b0f..b2dbb92d5 100644 --- a/yt_dlp/extractor/fczenit.py +++ b/yt_dlp/extractor/fczenit.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, ) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index f604cbd40..ae837f6a0 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, traverse_obj, diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index 0cd18f494..69ca87c84 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -2,10 +2,10 @@ from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + int_or_none, qualities, strip_or_none, - int_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index f9d22fd33..c10d290dc 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -7,7 +7,7 @@ parse_codecs, parse_duration, str_to_int, - unified_timestamp + unified_timestamp, ) diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py index 1d3c0b110..b284e1e28 100644 --- a/yt_dlp/extractor/gamejolt.py +++ b/yt_dlp/extractor/gamejolt.py @@ -10,7 +10,7 @@ int_or_none, str_or_none, traverse_obj, - try_get + try_get, ) diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py index bc56b03e3..6403be8cf 100644 --- a/yt_dlp/extractor/gaskrank.py +++ b/yt_dlp/extractor/gaskrank.py @@ -1,4 +1,5 @@ import re + from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/gbnews.py b/yt_dlp/extractor/gbnews.py new file mode 100644 index 000000000..bb1554eea --- /dev/null +++ b/yt_dlp/extractor/gbnews.py @@ -0,0 +1,107 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + extract_attributes, + get_elements_html_by_class, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class GBNewsIE(InfoExtractor): + IE_DESC = 'GB News clips, features and live streams' + _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P[^#?]+)' + + _PLATFORM = 'safari' + _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php' + _TESTS = [{ + 'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row', + 'info_dict': { + 'id': '52264136', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row', + 'description': 'The post was criticised by former employers of the broadcaster', + 'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism', + }, + }, { + 'url': 'https://www.gbnews.com/royal/prince-harry-in-love-with-kate-meghan-markle-jealous-royal', + 'info_dict': { + 'id': '52328390', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal', + 'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family', + 'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'', + } + }, { + 'url': 'https://www.gbnews.uk/watchlive', + 'info_dict': { + 'id': '1069', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'watchlive', + 'live_status': 'is_live', + 'title': r're:^GB News Live', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + @functools.lru_cache + def _get_ss_endpoint(self, data_id, data_env): + if not data_id: + data_id = 'GB003' + if not data_env: + data_env = 'production' + + json_data = self._download_json( + self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={ + 'id': data_id, + 'env': data_env, + }) + meta_url = traverse_obj(json_data, ('response', 'api_hostname', {url_or_none})) + if not meta_url: + raise ExtractorError('No API host found') + + return meta_url + + def _real_extract(self, url): + display_id = self._match_id(url).rpartition('/')[2] + webpage = self._download_webpage(url, display_id) + + video_data = None + elements = get_elements_html_by_class('simplestream', webpage) + for html_tag in elements: + attributes = extract_attributes(html_tag) + if 'sidebar' not in (attributes.get('class') or ''): + video_data = attributes + if not video_data: + raise ExtractorError('Could not find video element', expected=True) + + endpoint_url = self._get_ss_endpoint(video_data.get('data-id'), video_data.get('data-env')) + + uvid = video_data['data-uvid'] + video_type = video_data.get('data-type') + if not video_type or video_type == 'vod': + video_type = 'show' + stream_data = self._download_json( + f'{endpoint_url}/api/{video_type}/stream/{uvid}', + uvid, 'Downloading stream JSON', query={ + 'key': video_data.get('data-key'), + 'platform': self._PLATFORM, + }) + if traverse_obj(stream_data, 'drm'): + self.report_drm(uvid) + + return { + 'id': uvid, + 'display_id': display_id, + 'title': self._og_search_title(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + 'formats': self._extract_m3u8_formats(traverse_obj(stream_data, ( + 'response', 'stream', {url_or_none})), uvid, 'mp4'), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'is_live': video_type == 'live', + } diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2cfed0fd0..2818c718d 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -4,7 +4,7 @@ import urllib.parse import xml.etree.ElementTree -from .common import InfoExtractor # isort: split +from .common import InfoExtractor from .commonprotocols import RtmpIE from .youtube import YoutubeIE from ..compat import compat_etree_fromstring diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 7795dc56f..b9dc7c63c 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - bool_or_none, ExtractorError, + bool_or_none, dict_get, float_or_none, int_or_none, diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py index c5bc86bb4..7baf8de8d 100644 --- a/yt_dlp/extractor/gigya.py +++ b/yt_dlp/extractor/gigya.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( ExtractorError, urlencode_postdata, diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index 22aac0db9..515f3c567 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -3,9 +3,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, extract_attributes, - ExtractorError, int_or_none, parse_qs, smuggle_url, diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index b075a02e0..fba98d79f 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -3,16 +3,16 @@ from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( - int_or_none, - determine_ext, - parse_age_limit, - remove_start, - remove_end, - try_get, - urlencode_postdata, ExtractorError, - unified_timestamp, + determine_ext, + int_or_none, + parse_age_limit, + remove_end, + remove_start, traverse_obj, + try_get, + unified_timestamp, + urlencode_postdata, ) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index f010fff36..276a6c7fe 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -4,7 +4,7 @@ determine_ext, str_or_none, unified_timestamp, - url_or_none + url_or_none, ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index c6eca0c4d..fac088462 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -1,10 +1,7 @@ import hashlib from .common import InfoExtractor -from ..utils import ( - ExtractorError, - try_get -) +from ..utils import ExtractorError, try_get class GofileIE(InfoExtractor): diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py index 112293bef..9c1a6cb91 100644 --- a/yt_dlp/extractor/gotostage.py +++ b/yt_dlp/extractor/gotostage.py @@ -1,11 +1,8 @@ +import json + from .common import InfoExtractor from ..compat import compat_str -from ..utils import ( - try_get, - url_or_none -) - -import json +from ..utils import try_get, url_or_none class GoToStageIE(InfoExtractor): diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py index 530bdb727..2551cfffd 100644 --- a/yt_dlp/extractor/hbo.py +++ b/yt_dlp/extractor/hbo.py @@ -2,11 +2,11 @@ from .common import InfoExtractor from ..utils import ( - xpath_text, - xpath_element, int_or_none, parse_duration, urljoin, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index c7da8f97d..eb0a77952 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, KNOWN_EXTENSIONS, + determine_ext, str_to_int, ) diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py index e026996da..099c2a175 100644 --- a/yt_dlp/extractor/hketv.py +++ b/yt_dlp/extractor/hketv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, merge_dicts, parse_count, diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index 57b76e46b..41d50d000 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -4,8 +4,8 @@ from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, parse_age_limit, try_get, diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index c4965f9bc..5379b5410 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -2,8 +2,8 @@ import random import re -from ..compat import compat_urlparse, compat_b64decode - +from .common import InfoExtractor +from ..compat import compat_b64decode, compat_urlparse from ..utils import ( ExtractorError, int_or_none, @@ -13,8 +13,6 @@ update_url_query, ) -from .common import InfoExtractor - class HuyaLiveIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P[^/#?&]+)(?:\D|$)' diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index 9d55ddc02..c28d09f34 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate from ..compat import compat_str +from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate class IchinanaLiveIE(InfoExtractor): diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 192bcfe35..2bb48508c 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -1,3 +1,4 @@ +from .bokecc import BokeCCBaseIE from ..compat import ( compat_b64decode, compat_urllib_parse_unquote, @@ -6,10 +7,9 @@ from ..utils import ( ExtractorError, determine_ext, - update_url_query, traverse_obj, + update_url_query, ) -from .bokecc import BokeCCBaseIE class InfoQIE(BokeCCBaseIE): diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index f7aa579b3..d5a3d8095 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -3,12 +3,12 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, js_to_json, - urlencode_postdata, - ExtractorError, parse_qs, - traverse_obj + traverse_obj, + urlencode_postdata, ) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 3368ab1d9..85ed549de 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -4,20 +4,16 @@ import time from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, - compat_urllib_parse_unquote -) from .openload import PhantomJSwrapper +from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode from ..utils import ( + ExtractorError, clean_html, decode_packed_codes, - ExtractorError, float_or_none, format_field, - get_element_by_id, get_element_by_attribute, + get_element_by_id, int_or_none, js_to_json, ohdave_rsa_encrypt, diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py index 713fd4ec5..5d6fbaa01 100644 --- a/yt_dlp/extractor/itprotv.py +++ b/yt_dlp/extractor/itprotv.py @@ -1,12 +1,11 @@ import re from .common import InfoExtractor - from ..utils import ( int_or_none, str_or_none, traverse_obj, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 9ac7be307..55c416521 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -1,23 +1,22 @@ import json -from .common import InfoExtractor from .brightcove import BrightcoveNewIE - +from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + JSON_LD_RE, + ExtractorError, base_url, clean_html, determine_ext, extract_attributes, - ExtractorError, get_element_by_class, - JSON_LD_RE, merge_dicts, parse_duration, smuggle_url, try_get, - url_or_none, url_basename, + url_or_none, urljoin, ) diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index e23fdfd6a..a11f3f11d 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,9 +1,9 @@ import functools -import urllib.parse -import urllib.error import hashlib import json import time +import urllib.error +import urllib.parse from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index a2bbba397..8557a81ad 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -1,8 +1,8 @@ import hashlib import random -from ..compat import compat_str from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( clean_html, int_or_none, diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py index 6c650568a..19d2b923b 100644 --- a/yt_dlp/extractor/japandiet.py +++ b/yt_dlp/extractor/japandiet.py @@ -1,5 +1,6 @@ import re +from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, @@ -9,9 +10,8 @@ smuggle_url, traverse_obj, try_call, - unsmuggle_url + unsmuggle_url, ) -from .common import InfoExtractor def _parse_japanese_date(text): diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py new file mode 100644 index 000000000..e7186d75c --- /dev/null +++ b/yt_dlp/extractor/jiocinema.py @@ -0,0 +1,403 @@ +import base64 +import itertools +import json +import random +import re +import string +import time + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + jwt_decode_hs256, + parse_age_limit, + try_call, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class JioCinemaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'jiocinema' + _GEO_BYPASS = False + _ACCESS_TOKEN = None + _REFRESH_TOKEN = None + _GUEST_TOKEN = None + _USER_ID = None + _DEVICE_ID = None + _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'} + _APP_NAME = {'appName': 'RJIL_JioCinema'} + _APP_VERSION = {'appVersion': '5.0.0'} + _API_SIGNATURES = 'o668nxgzwff' + _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi' + _ACCESS_HINT = 'the `accessToken` from your browser local storage' + _LOGIN_HINT = ( + 'Log in with "-u phone -p " to authenticate with OTP, ' + f'or use "-u token -p " to log in with {_ACCESS_HINT}. ' + 'If you have previously logged in with yt-dlp and your session ' + 'has been cached, you can use "-u device -p "') + + def _cache_token(self, token_type): + assert token_type in ('access', 'refresh', 'all') + if token_type in ('access', 'all'): + self.cache.store( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN) + if token_type in ('refresh', 'all'): + self.cache.store( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN) + + def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}): + return self._download_json( + url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + **self._API_HEADERS, + **headers, + }, expected_status=(400, 403, 474)) + + def _call_auth_api(self, service, endpoint, note, headers={}, data={}): + return self._call_api( + f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}', + None, note=note, headers=headers, data=data) + + def _refresh_token(self): + if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID: + raise ExtractorError('User token has expired', expected=True) + response = self._call_auth_api( + 'token', 'refreshtoken', 'Refreshing token', + headers={'accesstoken': self._ACCESS_TOKEN}, data={ + **self._APP_NAME, + 'deviceId': self._DEVICE_ID, + 'refreshToken': self._REFRESH_TOKEN, + **self._APP_VERSION, + }) + refresh_token = response.get('refreshTokenId') + if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN: + JioCinemaBaseIE._REFRESH_TOKEN = refresh_token + self._cache_token('refresh') + JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] + self._cache_token('access') + + def _fetch_guest_token(self): + JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10)) + guest_token = self._call_auth_api( + 'token', 'guest', 'Downloading guest token', data={ + **self._APP_NAME, + 'deviceType': 'phone', + 'os': 'ios', + 'deviceId': self._DEVICE_ID, + 'freshLaunch': False, + 'adId': self._DEVICE_ID, + **self._APP_VERSION, + }) + self._GUEST_TOKEN = guest_token['authToken'] + self._USER_ID = guest_token['userId'] + + def _call_login_api(self, endpoint, guest_token, data, note): + return self._call_auth_api( + 'user', f'loginotp/{endpoint}', note, headers={ + **self.geo_verification_headers(), + 'accesstoken': self._GUEST_TOKEN, + **self._APP_NAME, + **traverse_obj(guest_token, 'data', { + 'deviceType': ('deviceType', {str}), + 'os': ('os', {str}), + })}, data=data) + + def _is_token_expired(self, token): + return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180) + + def _perform_login(self, username, password): + if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN): + return + + UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' + + if username.lower() == 'token': + if try_call(lambda: jwt_decode_hs256(password)): + JioCinemaBaseIE._ACCESS_TOKEN = password + refresh_hint = 'the `refreshToken` UUID from your browser local storage' + refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0] + if not refresh_token: + self.to_screen( + 'To extend the life of your login session, in addition to your access token, ' + 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" ' + f'where REFRESH_TOKEN is {refresh_hint}') + elif re.fullmatch(UUID_RE, refresh_token): + JioCinemaBaseIE._REFRESH_TOKEN = refresh_token + else: + self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}') + else: + raise ExtractorError( + f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True) + + elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password): + JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh') + JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access') + if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN: + raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True) + + elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password): + self._fetch_guest_token() + guest_token = jwt_decode_hs256(self._GUEST_TOKEN) + initial_data = { + 'number': base64.b64encode(password.encode()).decode(), + **self._APP_VERSION, + } + response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP') + if not traverse_obj(response, ('OTPInfo', {dict})): + raise ExtractorError('There was a problem with the phone number login attempt') + + is_iphone = guest_token.get('os') == 'ios' + response = self._call_login_api('verify', guest_token, { + 'deviceInfo': { + 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android', + 'info': { + 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'}, + 'androidId': self._DEVICE_ID, + 'type': 'iOS' if is_iphone else 'Android' + } + }, + **initial_data, + 'otp': self._get_tfa_info('the one-time password sent to your phone') + }, 'Submitting OTP') + if traverse_obj(response, 'code') == 1043: + raise ExtractorError('Wrong OTP', expected=True) + JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken'] + JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] + + else: + raise ExtractorError(self._LOGIN_HINT, expected=True) + + user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data'] + JioCinemaBaseIE._USER_ID = user_token['userId'] + JioCinemaBaseIE._DEVICE_ID = user_token['deviceId'] + if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device': + self._cache_token('all') + if self.get_param('cachedir') is not False: + self.to_screen( + f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"') + elif not JioCinemaBaseIE._REFRESH_TOKEN: + JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh') + if JioCinemaBaseIE._REFRESH_TOKEN: + self._cache_token('access') + self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"') + if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN): + self._refresh_token() + + +class JioCinemaIE(JioCinemaBaseIE): + IE_NAME = 'jiocinema' + _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P\d{3,})' + _TESTS = [{ + 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931', + 'info_dict': { + 'id': '3759931', + 'ext': 'mp4', + 'title': 'Pradeep to stop the wedding?', + 'description': 'md5:75f72d1d1a66976633345a3de6d672b1', + 'episode': 'Pradeep to stop the wedding?', + 'episode_number': 89, + 'season': 'Agnisakshi…Ek Samjhauta-S1', + 'season_number': 1, + 'series': 'Agnisakshi Ek Samjhauta', + 'duration': 1238.0, + 'thumbnail': r're:https?://.+\.jpg', + 'age_limit': 13, + 'season_id': '3698031', + 'upload_date': '20230606', + 'timestamp': 1686009600, + 'release_date': '20230607', + 'genres': ['Drama'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch', + 'info_dict': { + 'id': '3754021', + 'ext': 'mp4', + 'title': 'Bhediya', + 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0', + 'episode': 'Bhediya', + 'duration': 8500.0, + 'thumbnail': r're:https?://.+\.jpg', + 'age_limit': 13, + 'upload_date': '20230525', + 'timestamp': 1685026200, + 'release_date': '20230524', + 'genres': ['Comedy'], + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _extract_formats_and_subtitles(self, playback, video_id): + m3u8_url = traverse_obj(playback, ( + 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any)) + if not m3u8_url: # DRM-only content only serves dash urls + self.report_drm(video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls') + self._remove_duplicate_formats(formats) + + return { + # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p + 'formats': traverse_obj(formats, ( + lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)), + 'subtitles': subtitles, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN): + self._fetch_guest_token() + elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN): + self._refresh_token() + + playback = self._call_api( + f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id, + 'Downloading playback JSON', headers={ + **self.geo_verification_headers(), + 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN, + **self._APP_NAME, + 'deviceid': self._DEVICE_ID, + 'uniqueid': self._USER_ID, + 'x-apisignatures': self._API_SIGNATURES, + 'x-platform': 'androidweb', + 'x-platform-token': 'web', + }, data={ + '4k': False, + 'ageGroup': '18+', + 'appVersion': '3.4.0', + 'bitrateProfile': 'xhdpi', + 'capability': { + 'drmCapability': { + 'aesSupport': 'yes', + 'fairPlayDrmSupport': 'none', + 'playreadyDrmSupport': 'none', + 'widevineDRMSupport': 'none' + }, + 'frameRateCapability': [{ + 'frameRateSupport': '30fps', + 'videoQuality': '1440p' + }] + }, + 'continueWatchingRequired': False, + 'dolby': False, + 'downloadRequest': False, + 'hevc': False, + 'kidsSafe': False, + 'manufacturer': 'Windows', + 'model': 'Windows', + 'multiAudioRequired': True, + 'osVersion': '10', + 'parentalPinValid': True, + 'x-apisignatures': self._API_SIGNATURES + }) + + status_code = traverse_obj(playback, ('code', {int})) + if status_code == 474: + self.raise_geo_restricted(countries=['IN']) + elif status_code == 1008: + error_msg = 'This content is only available for premium users' + if self._ACCESS_TOKEN: + raise ExtractorError(error_msg, expected=True) + self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None) + elif status_code == 400: + raise ExtractorError('The requested content is not available', expected=True) + elif status_code is not None and status_code != 200: + raise ExtractorError( + f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}') + + metadata = self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details', + video_id, fatal=False, query={ + 'ids': f'include:{video_id}', + 'responseType': 'common', + 'devicePlatformType': 'desktop', + }) + + return { + 'id': video_id, + 'http_headers': self._API_HEADERS, + **self._extract_formats_and_subtitles(playback, video_id), + **traverse_obj(playback, ('data', { + # fallback metadata + 'title': ('name', {str}), + 'description': ('fullSynopsis', {str}), + 'series': ('show', 'name', {str}, {lambda x: x or None}), + 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}), + 'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}), + 'episode': ('fullTitle', {str}), + 'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}), + 'age_limit': ('ageNemonic', {parse_age_limit}), + 'duration': ('totalDuration', {float_or_none}), + 'thumbnail': ('images', {url_or_none}), + })), + **traverse_obj(metadata, ('result', 0, { + 'title': ('fullTitle', {str}), + 'description': ('fullSynopsis', {str}), + 'series': ('showName', {str}, {lambda x: x or None}), + 'season': ('seasonName', {str}, {lambda x: x or None}), + 'season_number': ('season', {int_or_none}), + 'season_id': ('seasonId', {str}, {lambda x: x or None}), + 'episode': ('fullTitle', {str}), + 'episode_number': ('episode', {int_or_none}), + 'timestamp': ('uploadTime', {int_or_none}), + 'release_date': ('telecastDate', {str}), + 'age_limit': ('ageNemonic', {parse_age_limit}), + 'duration': ('duration', {float_or_none}), + 'genres': ('genres', ..., {str}), + 'thumbnail': ('seo', 'ogImage', {url_or_none}), + })), + } + + +class JioCinemaSeriesIE(JioCinemaBaseIE): + IE_NAME = 'jiocinema:series' + _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P[\w-]+)/(?P\d{3,})' + _TESTS = [{ + 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917', + 'info_dict': { + 'id': '3499917', + 'title': 'naagin', + }, + 'playlist_mincount': 120, + }] + + def _entries(self, series_id): + seasons = self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id, + 'Downloading series metadata JSON', query={ + 'sort': 'season:asc', + 'id': series_id, + 'responseType': 'common', + }) + + for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1): + season_id = season['id'] + label = season.get('season') or season_num + for page_num in itertools.count(1): + episodes = traverse_obj(self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode', + season_id, f'Downloading season {label} page {page_num} JSON', query={ + 'sort': 'episode:asc', + 'id': season_id, + 'responseType': 'common', + 'page': page_num, + }), ('result', lambda _, v: v['id'] and url_or_none(v['slug']))) + if not episodes: + break + for episode in episodes: + yield self.url_result( + episode['slug'], JioCinemaIE, **traverse_obj(episode, { + 'video_id': 'id', + 'video_title': ('fullTitle', {str}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('episode', {int_or_none}), + })) + + def _real_extract(self, url): + slug, series_id = self._match_valid_url(url).group('slug', 'id') + return self.playlist_result(self._entries(series_id), series_id, slug) diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py index 245fe73d4..8069fea4c 100644 --- a/yt_dlp/extractor/jove.py +++ b/yt_dlp/extractor/jove.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - ExtractorError, - unified_strdate -) +from ..utils import ExtractorError, unified_strdate class JoveIE(InfoExtractor): diff --git a/yt_dlp/extractor/jstream.py b/yt_dlp/extractor/jstream.py index 3e2e62712..00ac7ccca 100644 --- a/yt_dlp/extractor/jstream.py +++ b/yt_dlp/extractor/jstream.py @@ -1,6 +1,6 @@ import base64 -import re import json +import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 43055e89d..563aa2d72 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -3,8 +3,8 @@ from ..utils import ( ExtractorError, int_or_none, - strip_or_none, str_or_none, + strip_or_none, traverse_obj, unified_timestamp, ) diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index 95e2deea5..4752d5a55 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -4,18 +4,18 @@ from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_parse_qs, + compat_urlparse, ) from ..utils import ( - clean_html, ExtractorError, + clean_html, format_field, int_or_none, - unsmuggle_url, + remove_start, smuggle_url, traverse_obj, - remove_start + unsmuggle_url, ) diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py index 8f247b305..3d74c745c 100644 --- a/yt_dlp/extractor/kankanews.py +++ b/yt_dlp/extractor/kankanews.py @@ -1,7 +1,7 @@ -import time +import hashlib import random import string -import hashlib +import time import urllib.parse from .common import InfoExtractor diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py index 3c93dedac..b77667160 100644 --- a/yt_dlp/extractor/kuwo.py +++ b/yt_dlp/extractor/kuwo.py @@ -3,10 +3,10 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - get_element_by_id, - clean_html, ExtractorError, InAdvancePagedList, + clean_html, + get_element_by_id, remove_start, ) diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py index e7d2f8a24..708cb548d 100644 --- a/yt_dlp/extractor/lci.py +++ b/yt_dlp/extractor/lci.py @@ -1,9 +1,25 @@ from .common import InfoExtractor +from .wat import WatIE +from ..utils import ExtractorError, int_or_none +from ..utils.traversal import traverse_obj class LCIIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P\d+)\.html' _TESTS = [{ + 'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html', + 'info_dict': { + 'id': '14113788', + 'ext': 'mp4', + 'title': '24H Pujadas du vendredi 24 mai 2024', + 'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg', + 'upload_date': '20240524', + 'duration': 6158, + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 'info_dict': { 'id': '13875948', @@ -24,5 +40,10 @@ class LCIIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id') - return self.url_result('wat:' + wat_id, 'Wat', wat_id) + next_data = self._search_nextjs_data(webpage, video_id) + wat_id = traverse_obj(next_data, ( + 'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any)) + if wat_id is None: + raise ExtractorError('Could not find wat_id') + + return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id)) diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py index 9846319e0..62874195f 100644 --- a/yt_dlp/extractor/lcp.py +++ b/yt_dlp/extractor/lcp.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .arkena import ArkenaIE +from .common import InfoExtractor class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py index 10fb5d479..1a3ada1e5 100644 --- a/yt_dlp/extractor/lecture2go.py +++ b/yt_dlp/extractor/lecture2go.py @@ -4,8 +4,8 @@ from ..utils import ( determine_ext, determine_protocol, - parse_duration, int_or_none, + parse_duration, ) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 629d208fc..90f0268d7 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, float_or_none, int_or_none, str_or_none, diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index 5d61a607f..a113b3d0d 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -11,9 +11,9 @@ compat_urllib_parse_urlencode, ) from ..utils import ( + ExtractorError, determine_ext, encode_data_uri, - ExtractorError, int_or_none, orderedSet, parse_iso8601, diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py index b76ca0908..297993939 100644 --- a/yt_dlp/extractor/libraryofcongress.py +++ b/yt_dlp/extractor/libraryofcongress.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( determine_ext, float_or_none, diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py index 919cfcb37..ea150a58b 100644 --- a/yt_dlp/extractor/lifenews.py +++ b/yt_dlp/extractor/lifenews.py @@ -6,8 +6,8 @@ compat_urlparse, ) from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, remove_end, diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index 4e50f106f..1ff091ddb 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -3,13 +3,13 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, determine_ext, float_or_none, int_or_none, smuggle_url, try_get, unsmuggle_url, - ExtractorError, ) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index e12f467ef..2a7c6f0e0 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -7,8 +7,8 @@ extract_attributes, float_or_none, int_or_none, - srt_subtitles_timecode, mimetype2ext, + srt_subtitles_timecode, traverse_obj, try_get, url_or_none, diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py index fd9bba8bc..fa12a6a8d 100644 --- a/yt_dlp/extractor/mainstreaming.py +++ b/yt_dlp/extractor/mainstreaming.py @@ -1,14 +1,13 @@ import re from .common import InfoExtractor - from ..utils import ( int_or_none, js_to_json, parse_duration, traverse_obj, try_get, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py index 2792e6e70..44c321c26 100644 --- a/yt_dlp/extractor/manoto.py +++ b/yt_dlp/extractor/manoto.py @@ -1,10 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - clean_html, - int_or_none, - traverse_obj -) - +from ..utils import clean_html, int_or_none, traverse_obj _API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}' diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 675ad8ccc..d040fb48f 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -4,8 +4,8 @@ from ..compat import compat_str from ..utils import ( ExtractorError, - format_field, float_or_none, + format_field, int_or_none, str_or_none, traverse_obj, diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index fcc4827b5..c01597762 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -1,14 +1,11 @@ +from .common import InfoExtractor +from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ( ExtractorError, traverse_obj, unified_strdate, url_or_none, ) -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_str -) class MediaKlikkIE(InfoExtractor): diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index e04a1ce90..b7df5c75a 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -5,11 +5,11 @@ from ..utils import ( ExtractorError, GeoRestrictedError, - int_or_none, OnDemandPagedList, + int_or_none, try_get, - urljoin, update_url_query, + urljoin, ) diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index 7ea78ab69..d3fec4ec2 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import ( @@ -10,16 +10,15 @@ ExtractorError, float_or_none, mimetype2ext, + smuggle_url, str_or_none, try_call, try_get, - smuggle_url, unsmuggle_url, url_or_none, urljoin, ) - _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})' diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index 5f5f16087..f6a0b416d 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..utils import ( merge_dicts, - parse_iso8601, parse_duration, + parse_iso8601, parse_resolution, try_get, url_basename, diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index f64d575dc..caf60c805 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -4,11 +4,11 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, + OnDemandPagedList, determine_ext, dict_get, - ExtractorError, float_or_none, - OnDemandPagedList, traverse_obj, ) diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index 38cc0c274..979584ed6 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -1,11 +1,11 @@ -import re import json +import re from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( - clean_html, ExtractorError, + clean_html, get_element_by_id, ) diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py index a69a12e18..411d41cb0 100644 --- a/yt_dlp/extractor/monstercat.py +++ b/yt_dlp/extractor/monstercat.py @@ -8,10 +8,10 @@ get_element_html_by_class, get_element_text_and_html_by_tag, int_or_none, - unified_strdate, strip_or_none, traverse_obj, try_call, + unified_strdate, ) diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py index 35c57bc70..ed5be4fa6 100644 --- a/yt_dlp/extractor/moviepilot.py +++ b/yt_dlp/extractor/moviepilot.py @@ -1,5 +1,5 @@ -from .dailymotion import DailymotionIE from .common import InfoExtractor +from .dailymotion import DailymotionIE class MoviepilotIE(InfoExtractor): diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py index cdd8ba4dc..6e0ea2652 100644 --- a/yt_dlp/extractor/movingimage.py +++ b/yt_dlp/extractor/movingimage.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - unescapeHTML, parse_duration, + unescapeHTML, ) diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index 77d1806a3..79728e106 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, unescapeHTML, ) diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py index edc41443a..8a8a5fec7 100644 --- a/yt_dlp/extractor/n1.py +++ b/yt_dlp/extractor/n1.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - unified_timestamp, extract_attributes, + unified_timestamp, ) diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index 885557e91..26400e383 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -4,8 +4,8 @@ import itertools import json import re -import urllib.parse import time +import urllib.parse from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 81d11e3a5..ec4d6368e 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -7,9 +7,9 @@ compat_urllib_parse_unquote, ) from ..utils import ( + OnDemandPagedList, int_or_none, merge_dicts, - OnDemandPagedList, parse_duration, parse_iso8601, parse_qs, diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 267fa8353..e88f98abf 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -3,9 +3,9 @@ import re import xml.etree.ElementTree +from .adobepass import AdobePassIE from .common import InfoExtractor from .theplatform import ThePlatformIE, default_ns -from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..utils import ( diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 41ea3629a..243221d46 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, merge_dicts, parse_iso8601, diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py index febad8fdf..be732a32f 100644 --- a/yt_dlp/extractor/nfhsnetwork.py +++ b/yt_dlp/extractor/nfhsnetwork.py @@ -1,11 +1,5 @@ from .common import InfoExtractor - - -from ..utils import ( - try_get, - unified_strdate, - unified_timestamp -) +from ..utils import try_get, unified_strdate, unified_timestamp class NFHSNetworkIE(InfoExtractor): diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py index 2521c40e0..64cddb408 100644 --- a/yt_dlp/extractor/nhl.py +++ b/yt_dlp/extractor/nhl.py @@ -3,8 +3,8 @@ from ..utils import ( determine_ext, int_or_none, - parse_iso8601, parse_duration, + parse_iso8601, ) diff --git a/yt_dlp/extractor/ninenews.py b/yt_dlp/extractor/ninenews.py index 900d9ba60..0b4f47b48 100644 --- a/yt_dlp/extractor/ninenews.py +++ b/yt_dlp/extractor/ninenews.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .brightcove import BrightcoveNewIE +from .common import InfoExtractor from ..utils import ExtractorError from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index c655b75f4..b7170b0e7 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -2,8 +2,8 @@ from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, smuggle_url, str_or_none, try_get, diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index 35d1311dc..249e7cd33 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -1,13 +1,14 @@ +import random +import re + from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - parse_count, - unified_timestamp, - remove_end, determine_ext, + parse_count, + remove_end, + unified_timestamp, ) -import re -import random class NitterIE(InfoExtractor): diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index cddc72f71..513529bea 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -1,11 +1,11 @@ from .common import InfoExtractor from ..utils import ( - js_to_json, - mimetype2ext, determine_ext, - update_url_query, get_element_by_attribute, int_or_none, + js_to_json, + mimetype2ext, + update_url_query, ) diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index c7b803803..19cb972c0 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -1,11 +1,11 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - find_xpath_attr, - xpath_text, - update_url_query, -) from ..compat import compat_urllib_parse_unquote +from ..utils import ( + find_xpath_attr, + int_or_none, + update_url_query, + xpath_text, +) class NozIE(InfoExtractor): diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py index ec54041f1..5670445aa 100644 --- a/yt_dlp/extractor/nuevo.py +++ b/yt_dlp/extractor/nuevo.py @@ -1,9 +1,5 @@ from .common import InfoExtractor - -from ..utils import ( - float_or_none, - xpath_text -) +from ..utils import float_or_none, xpath_text class NuevoBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index 6ac351cb0..0ef0ec70b 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - parse_duration, int_or_none, + parse_duration, strip_or_none, traverse_obj, url_or_none, diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py index 062f9a875..0a12aea71 100644 --- a/yt_dlp/extractor/nzherald.py +++ b/yt_dlp/extractor/nzherald.py @@ -3,10 +3,7 @@ from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..compat import compat_str -from ..utils import ( - ExtractorError, - traverse_obj -) +from ..utils import ExtractorError, traverse_obj class NZHeraldIE(InfoExtractor): diff --git a/yt_dlp/extractor/odkmedia.py b/yt_dlp/extractor/odkmedia.py index b852160b9..8321b0741 100644 --- a/yt_dlp/extractor/odkmedia.py +++ b/yt_dlp/extractor/odkmedia.py @@ -7,7 +7,7 @@ GeoRestrictedError, float_or_none, traverse_obj, - try_call + try_call, ) diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 61d1f4048..5507d2fda 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - try_get -) +from ..utils import int_or_none, try_get class OlympicsReplayIE(InfoExtractor): diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py index a46211e77..351b397de 100644 --- a/yt_dlp/extractor/onenewsnz.py +++ b/yt_dlp/extractor/onenewsnz.py @@ -1,10 +1,6 @@ from .brightcove import BrightcoveNewIE from .common import InfoExtractor - -from ..utils import ( - ExtractorError, - traverse_obj -) +from ..utils import ExtractorError, traverse_obj class OneNewsNZIE(InfoExtractor): diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py index 0d59e8cb4..da10f3779 100644 --- a/yt_dlp/extractor/onet.py +++ b/yt_dlp/extractor/onet.py @@ -2,13 +2,13 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, + NO_DEFAULT, ExtractorError, + determine_ext, float_or_none, get_element_by_class, int_or_none, js_to_json, - NO_DEFAULT, parse_iso8601, remove_start, strip_or_none, diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index 1fafd9afb..12bf55704 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, traverse_obj, diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 82a81c6c2..c9a96aeb4 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, get_first, @@ -8,7 +9,6 @@ unified_strdate, unified_timestamp, ) -from ..compat import compat_str class OpenRecBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py index d49909d52..0e7a8484e 100644 --- a/yt_dlp/extractor/ora.py +++ b/yt_dlp/extractor/ora.py @@ -1,4 +1,5 @@ import re + from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 13561202c..3c837becd 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -3,204 +3,24 @@ import re from .common import InfoExtractor -from ..networking import HEADRequest from ..utils import ( - InAdvancePagedList, clean_html, determine_ext, float_or_none, int_or_none, - join_nonempty, make_archive_id, mimetype2ext, orderedSet, parse_age_limit, remove_end, - smuggle_url, strip_jsonp, try_call, - unescapeHTML, unified_strdate, - unsmuggle_url, url_or_none, ) from ..utils.traversal import traverse_obj -class ORFTVthekIE(InfoExtractor): - IE_NAME = 'orf:tvthek' - IE_DESC = 'ORF TVthek' - _VALID_URL = r'(?Phttps?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P\d+))(/[^/]+/(?P\d+))?(?:$|[?#])' - - _TESTS = [{ - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079', - 'info_dict': { - 'id': '14121079', - }, - 'playlist_count': 11, - 'params': {'noplaylist': True} - }, { - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150', - 'info_dict': { - 'id': '14121079', - }, - 'playlist_count': 1, - 'params': {'playlist_items': '5'} - }, { - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150', - 'info_dict': { - 'id': '14121079', - }, - 'playlist': [{ - 'info_dict': { - 'id': '15083150', - 'ext': 'mp4', - 'description': 'md5:7be1c485425f5f255a5e4e4815e77d04', - 'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg', - 'title': 'Umfrage: Welches Tier ist Sebastian Kurz?', - } - }], - 'playlist_count': 1, - 'params': {'noplaylist': True, 'skip_download': 'm3u8'} - }, { - 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', - 'playlist': [{ - 'md5': '2942210346ed779588f428a92db88712', - 'info_dict': { - 'id': '8896777', - 'ext': 'mp4', - 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde', - 'description': 'md5:c1272f0245537812d4e36419c207b67d', - 'duration': 2668, - 'upload_date': '20141208', - }, - }], - 'skip': 'Blocked outside of Austria / Germany', - }, { - 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', - 'info_dict': { - 'id': '7982259', - 'ext': 'mp4', - 'title': 'Best of Ingrid Thurnher', - 'upload_date': '20140527', - 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', - }, - 'params': { - 'skip_download': True, # rtsp downloads - }, - 'skip': 'Blocked outside of Austria / Germany', - }, { - 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141', - 'only_matching': True, - }, { - 'url': 'http://tvthek.orf.at/profile/Universum/35429', - 'only_matching': True, - }] - - def _pagefunc(self, url, data_jsb, n, *, image=None): - sd = data_jsb[n] - video_id, title = str(sd['id']), sd['title'] - formats = [] - for fd in sd['sources']: - src = url_or_none(fd.get('src')) - if not src: - continue - format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd) - ext = determine_ext(src) - if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest') - if any('/geoprotection' in f['url'] for f in m3u8_formats): - self.raise_geo_restricted() - formats.extend(m3u8_formats) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - src, video_id, f4m_id=format_id, fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest')) - else: - formats.append({ - 'format_id': format_id, - 'url': src, - 'protocol': fd.get('protocol'), - }) - - # Check for geoblocking. - # There is a property is_geoprotection, but that's always false - geo_str = sd.get('geoprotection_string') - http_url = next( - (f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])), - None) if geo_str else None - if http_url: - self._request_webpage( - HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking', - errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats') - - subtitles = {} - for sub in sd.get('subtitles', []): - sub_src = sub.get('src') - if not sub_src: - continue - subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({ - 'url': sub_src, - }) - - upload_date = unified_strdate(sd.get('created_date')) - - thumbnails = [] - preview = sd.get('preview_image_url') - if preview: - thumbnails.append({ - 'id': 'preview', - 'url': preview, - 'preference': 0, - }) - image = sd.get('image_full_url') or image - if image: - thumbnails.append({ - 'id': 'full', - 'url': image, - 'preference': 1, - }) - - yield { - 'id': video_id, - 'title': title, - 'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}), - 'formats': formats, - 'subtitles': subtitles, - 'description': sd.get('description'), - 'duration': int_or_none(sd.get('duration_in_seconds')), - 'upload_date': upload_date, - 'thumbnails': thumbnails, - } - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url) - playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url') - webpage = self._download_webpage(url, playlist_id) - - data_jsb = self._parse_json( - self._search_regex( - r']+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P.+?)\2', - webpage, 'playlist', group='json'), - playlist_id, transform_source=unescapeHTML)['playlist']['videos'] - - if not self._yes_playlist(playlist_id, video_id, smuggled_data): - data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id] - - playlist_count = len(data_jsb) - image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None - - page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image) - return { - '_type': 'playlist', - 'entries': InAdvancePagedList(page_func, playlist_count, 1), - 'id': playlist_id, - } - - class ORFRadioIE(InfoExtractor): IE_NAME = 'orf:radio' @@ -583,6 +403,7 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 1706472362, 'upload_date': '20240128', + '_old_archive_ids': ['orftvthek 14210000'], } }, { 'url': 'https://on.orf.at/video/3220355', @@ -597,6 +418,7 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 52916400, 'upload_date': '19710905', + '_old_archive_ids': ['orftvthek 3220355'], } }] @@ -631,6 +453,7 @@ def _extract_video(self, video_id): 'id': video_id, 'formats': formats, 'subtitles': subtitles, + '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)], **traverse_obj(api_json, { 'age_limit': ('age_classification', {parse_age_limit}), 'duration': ('duration_second', {float_or_none}), diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py index 56203306f..3e969c846 100644 --- a/yt_dlp/extractor/packtpub.py +++ b/yt_dlp/extractor/packtpub.py @@ -3,13 +3,12 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, + clean_html, # remove_end, str_or_none, strip_or_none, unified_timestamp, - # urljoin, ) diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index 63c5fd68f..6b2596236 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -5,17 +5,13 @@ import random from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urlparse -) - +from ..compat import compat_urllib_parse_urlparse, compat_urlparse from ..utils import ( - bug_reports_message, ExtractorError, + OnDemandPagedList, + bug_reports_message, get_first, int_or_none, - OnDemandPagedList, parse_qs, srt_subtitles_timecode, traverse_obj, diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index 7e472a63e..3f19803c0 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -1,7 +1,7 @@ import itertools -from .common import InfoExtractor from .cbs import CBSBaseIE +from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index 2bb2ea9f1..f6f5a5c3e 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -3,10 +3,11 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + US_RATINGS, ExtractorError, determine_ext, - int_or_none, float_or_none, + int_or_none, js_to_json, orderedSet, strip_jsonp, @@ -14,7 +15,6 @@ traverse_obj, unified_strdate, url_or_none, - US_RATINGS, ) diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index e27e5a7ba..086eaaf00 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..utils import ( qualities, - unified_timestamp, traverse_obj, + unified_timestamp, ) diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 730b2393e..fb4d02562 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + OnDemandPagedList, format_field, int_or_none, parse_resolution, @@ -12,7 +13,6 @@ unified_timestamp, url_or_none, urljoin, - OnDemandPagedList, ) @@ -1470,11 +1470,15 @@ def _real_extract(self, url): title = video['name'] - formats = [] + formats, is_live = [], False files = video.get('files') or [] for playlist in (video.get('streamingPlaylists') or []): if not isinstance(playlist, dict): continue + if playlist_url := url_or_none(playlist.get('playlistUrl')): + is_live = True + formats.extend(self._extract_m3u8_formats( + playlist_url, video_id, fatal=False, live=True)) playlist_files = playlist.get('files') if not (playlist_files and isinstance(playlist_files, list)): continue @@ -1498,6 +1502,7 @@ def _real_extract(self, url): f['vcodec'] = 'none' else: f['fps'] = int_or_none(file_.get('fps')) + is_live = False formats.append(f) description = video.get('description') @@ -1555,6 +1560,7 @@ def channel_data(field, type_): 'categories': categories, 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, 'webpage_url': webpage_url, } diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index 3ae985da2..87d912d56 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -2,6 +2,8 @@ from ..compat import compat_urlparse from ..utils import ( ExtractorError, + clean_html, + get_element_by_class, parse_duration, parse_filesize, str_to_int, @@ -88,34 +90,22 @@ def _real_extract(self, url): if category_id not in ('1', '2', '21', '22', '23', '24', '25'): raise ExtractorError('The URL does not contain audio.', expected=True) - str_duration, str_filesize = self._search_regex( - r'サイズ:(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size', - group=(1, 2), default=(None, None)) - str_viewcount = self._search_regex(r'閲覧数:([0-9,]+)\s+', webpage, 'view count', fatal=False) - - uploader_id, uploader = self._search_regex( - r'([^<]+)さん<', webpage, 'uploader', - group=(1, 2), default=(None, None)) - content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID') - create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp') - - player_webpage = self._download_webpage( - f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}', - video_id, note='Downloading player webpage') + def extract_info(name, description): + return self._search_regex(rf'{name}[::]\s*([\d\s,:/]+)\s*

', webpage, description, default=None) return { 'id': video_id, - 'title': self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False), - 'description': self._html_search_regex(r'(?s)(.+?)

\s*[^/]+)/prefid/)?(?P[\w-]+)''' - _EMBED_REGEX = [r']+src=["\'](?P(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)'] + _EMBED_REGEX = [r']+src=["\'](?P(?:https?:)?//player\.piksel\.(?:com|tech)/v/[a-z0-9]+)'] _TESTS = [ { - 'url': 'http://player.piksel.com/v/ums2867l', + 'url': 'http://player.piksel.tech/v/ums2867l', 'md5': '34e34c8d89dc2559976a6079db531e85', 'info_dict': { 'id': 'ums2867l', 'ext': 'mp4', 'title': 'GX-005 with Caption', 'timestamp': 1481335659, - 'upload_date': '20161210' + 'upload_date': '20161210', + 'description': '', + 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1488331553/3238987.jpg?w=640&h=480', } }, { # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al - 'url': 'https://player.piksel.com/v/v80kqp41', + 'url': 'https://player.piksel.tech/v/v80kqp41', 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', 'info_dict': { 'id': 'v80kqp41', @@ -55,7 +57,8 @@ class PikselIE(InfoExtractor): 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', 'timestamp': 1486171129, - 'upload_date': '20170204' + 'upload_date': '20170204', + 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1495569155/3279887.jpg?w=640&h=360', } }, { @@ -65,7 +68,7 @@ class PikselIE(InfoExtractor): } ] - def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True): + def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.tech', fatal=True): url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5') response = traverse_obj( self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {} @@ -146,7 +149,7 @@ def process_asset_files(asset_files): smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) if smil_url: - transform_source = None + transform_source = lambda x: x.replace('src="/', 'src="') if ref_id == 'nhkworld': # TODO: figure out if this is something to be fixed in urljoin, # _parse_smil_formats or keep it here diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index d67f6005c..c72a3876c 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -1,11 +1,11 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_qs, - xpath_text, qualities, + xpath_text, ) diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index 166b98c4a..d978c080b 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -4,8 +4,8 @@ compat_str, ) from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, str_or_none, try_get, diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index c418f88cb..a01b42290 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_urllib_parse_urlencode, + compat_urlparse, ) from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index 809b65608..60c9efffe 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -10,8 +10,8 @@ compat_urlparse, ) from ..utils import ( - dict_get, ExtractorError, + dict_get, float_or_none, int_or_none, parse_duration, diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index 1cebb365e..ecf2132b4 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -3,10 +3,10 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, try_get, url_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py index 51a9cf38f..d711d3e67 100644 --- a/yt_dlp/extractor/pornflip.py +++ b/yt_dlp/extractor/pornflip.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601 -) +from ..utils import int_or_none, parse_duration, parse_iso8601 class PornFlipIE(InfoExtractor): diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py index 2e51b4f6b..b8e8701a8 100644 --- a/yt_dlp/extractor/pornovoisines.py +++ b/yt_dlp/extractor/pornovoisines.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py index 5bb183270..338794ed5 100644 --- a/yt_dlp/extractor/prx.py +++ b/yt_dlp/extractor/prx.py @@ -1,14 +1,15 @@ import itertools + from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( - urljoin, - traverse_obj, + clean_html, int_or_none, mimetype2ext, - clean_html, - url_or_none, - unified_timestamp, str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index 4b8e5e90d..fc4c29e95 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -3,8 +3,8 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, parse_resolution, str_or_none, try_get, diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py index aa690d492..cb00de2d5 100644 --- a/yt_dlp/extractor/qingting.py +++ b/yt_dlp/extractor/qingting.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import traverse_obj diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index 92858259a..90141e63b 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..utils import ( - clean_html, ExtractorError, + clean_html, strip_jsonp, unescapeHTML, ) diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py index 1a5a6355a..4a09dcdfc 100644 --- a/yt_dlp/extractor/radiocanada.py +++ b/yt_dlp/extractor/radiocanada.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py index 38f8cf786..0c219778f 100644 --- a/yt_dlp/extractor/radiocomercial.py +++ b/yt_dlp/extractor/radiocomercial.py @@ -14,7 +14,7 @@ try_call, unified_strdate, update_url, - urljoin + urljoin, ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py index 67520172e..632c8c281 100644 --- a/yt_dlp/extractor/radiozet.py +++ b/yt_dlp/extractor/radiozet.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - traverse_obj, strip_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py index 3c00183be..325e278fc 100644 --- a/yt_dlp/extractor/radlive.py +++ b/yt_dlp/extractor/radlive.py @@ -1,13 +1,13 @@ import json +from .common import InfoExtractor from ..utils import ( ExtractorError, format_field, traverse_obj, try_get, - unified_timestamp + unified_timestamp, ) -from .common import InfoExtractor class RadLiveIE(InfoExtractor): diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index c1fc65c81..c2e7a6fb8 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -3,11 +3,11 @@ from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( + ExtractorError, + GeoRestrictedError, clean_html, determine_ext, - ExtractorError, filter_dict, - GeoRestrictedError, int_or_none, join_nonempty, parse_duration, diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py index 54f194cbd..5f2d0c103 100644 --- a/yt_dlp/extractor/rbgtum.py +++ b/yt_dlp/extractor/rbgtum.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError +from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj class RbgTumIE(InfoExtractor): diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index 6a7c7f399..9c382e257 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -5,11 +5,11 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - dict_get, ExtractorError, + dict_get, strip_or_none, traverse_obj, - try_get + try_get, ) diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py index 1a1c6634e..cc76b898a 100644 --- a/yt_dlp/extractor/rds.py +++ b/yt_dlp/extractor/rds.py @@ -1,10 +1,10 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + js_to_json, parse_duration, parse_iso8601, - js_to_json, ) -from ..compat import compat_str class RDSIE(InfoExtractor): diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index d1de2490f..fac51b9ef 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - float_or_none, ExtractorError, + float_or_none, ) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 62f669f35..bc3e5f7ee 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -5,11 +5,13 @@ ExtractorError, float_or_none, int_or_none, + parse_qs, traverse_obj, try_get, unescapeHTML, - urlencode_postdata, + update_url_query, url_or_none, + urlencode_postdata, ) @@ -76,7 +78,7 @@ class RedditIE(InfoExtractor): 'like_count': int, 'dislike_count': int, 'comment_count': int, - 'age_limit': 0, + 'age_limit': 18, 'channel_id': 'u_creepyt0es', }, 'params': { @@ -150,6 +152,51 @@ class RedditIE(InfoExtractor): 'like_count': int, }, 'skip': 'Requires account that has opted-in to the GenZedong subreddit', + }, { + # subtitles in HLS manifest + 'url': 'https://www.reddit.com/r/Unexpected/comments/1cl9h0u/the_insurance_claim_will_be_interesting/', + 'info_dict': { + 'id': 'a2mdj5d57qyc1', + 'ext': 'mp4', + 'display_id': '1cl9h0u', + 'title': 'The insurance claim will be interesting', + 'uploader': 'darrenpauli', + 'channel_id': 'Unexpected', + 'duration': 53, + 'upload_date': '20240506', + 'timestamp': 1714966382, + 'age_limit': 0, + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + 'subtitles': {'en': 'mincount:1'}, + }, + 'params': { + 'skip_download': True, + }, + }, { + # subtitles from caption-url + 'url': 'https://www.reddit.com/r/soccer/comments/1cxwzso/tottenham_1_0_newcastle_united_james_maddison_31/', + 'info_dict': { + 'id': 'xbmj4t3igy1d1', + 'ext': 'mp4', + 'display_id': '1cxwzso', + 'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'', + 'uploader': 'Woodstovia', + 'channel_id': 'soccer', + 'duration': 30, + 'upload_date': '20240522', + 'timestamp': 1716373798, + 'age_limit': 0, + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + 'subtitles': {'en': 'mincount:1'}, + }, + 'params': { + 'skip_download': True, + 'writesubtitles': True, + }, }, { 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', 'only_matching': True, @@ -197,6 +244,12 @@ def _perform_login(self, username, password): elif not traverse_obj(login, ('json', 'data', 'cookie', {str})): raise ExtractorError('Unable to login, no cookie was returned') + def _get_subtitles(self, video_id): + # Fallback if there were no subtitles provided by DASH or HLS manifests + caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt' + if self._is_valid_url(caption_url, video_id, item='subtitles'): + return {'en': [{'url': caption_url}]} + def _real_extract(self, url): host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id') @@ -307,6 +360,10 @@ def add_thumbnail(src): dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + qs = traverse_obj(parse_qs(hls_playlist_url), { + 'f': ('f', 0, {lambda x: ','.join([x, 'subsAll']) if x else 'hd,subsAll'}), + }) + hls_playlist_url = update_url_query(hls_playlist_url, qs) formats = [{ 'url': unescapeHTML(reddit_video['fallback_url']), @@ -332,7 +389,7 @@ def add_thumbnail(src): 'id': video_id, 'display_id': display_id, 'formats': formats, - 'subtitles': subtitles, + 'subtitles': subtitles or self.extract_subtitles(video_id), 'duration': int_or_none(reddit_video.get('duration')), } diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index f9453202b..d0546bbfa 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -5,10 +5,10 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + OnDemandPagedList, int_or_none, qualities, try_get, - OnDemandPagedList, ) diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 965abbee8..14ed0edab 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, merge_dicts, str_to_int, diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py index 0a8f13b9f..9c9bac6af 100644 --- a/yt_dlp/extractor/reuters.py +++ b/yt_dlp/extractor/reuters.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - js_to_json, int_or_none, + js_to_json, unescapeHTML, ) diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py index 8d29b302b..bc59ed07e 100644 --- a/yt_dlp/extractor/rmcdecouverte.py +++ b/yt_dlp/extractor/rmcdecouverte.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE +from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urlparse, diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py index 7ba80d4ba..729804d23 100644 --- a/yt_dlp/extractor/rte.py +++ b/yt_dlp/extractor/rte.py @@ -3,13 +3,13 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, float_or_none, parse_iso8601, str_or_none, try_get, unescapeHTML, url_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py index 5928a207a..ec78d0a66 100644 --- a/yt_dlp/extractor/rtp.py +++ b/yt_dlp/extractor/rtp.py @@ -1,9 +1,10 @@ +import base64 +import json +import re +import urllib.parse + from .common import InfoExtractor from ..utils import js_to_json -import re -import json -import urllib.parse -import base64 class RTPIE(InfoExtractor): diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py index 741c47262..e7dcd5fd6 100644 --- a/yt_dlp/extractor/rtvcplay.py +++ b/yt_dlp/extractor/rtvcplay.py @@ -1,16 +1,17 @@ import re -from .common import InfoExtractor, ExtractorError +from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - int_or_none, float_or_none, + int_or_none, js_to_json, mimetype2ext, traverse_obj, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index a84a78da8..defb8d741 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( parse_duration, traverse_obj, diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 287824d08..eb12f32fa 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -5,8 +5,8 @@ compat_str, ) from ..utils import ( - determine_ext, bool_or_none, + determine_ext, int_or_none, parse_qs, try_get, diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index d7f9a7337..726d49111 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -1,11 +1,7 @@ import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - str_to_int -) +from ..utils import ExtractorError, int_or_none, str_to_int class RUTVIE(InfoExtractor): diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 33f6652df..dc61387be 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( - determine_ext, ExtractorError, + determine_ext, find_xpath_attr, int_or_none, traverse_obj, diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index 8d322d710..17dff0afa 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -2,7 +2,6 @@ import re from .common import InfoExtractor - from ..compat import ( compat_parse_qs, compat_urlparse, diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py index 3912f7786..85d51cd59 100644 --- a/yt_dlp/extractor/scrippsnetworks.py +++ b/yt_dlp/extractor/scrippsnetworks.py @@ -1,8 +1,8 @@ -import json import hashlib +import json -from .aws import AWSIE from .anvato import AnvatoIE +from .aws import AWSIE from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index 9c2ca8c51..fc91d60e1 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - decode_packed_codes, ExtractorError, + decode_packed_codes, urlencode_postdata, ) diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index 1ecea71fc..99fcf51f1 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -2,12 +2,12 @@ from .common import InfoExtractor from ..utils import ( - float_or_none, - parse_iso8601, - update_url_query, - int_or_none, determine_protocol, + float_or_none, + int_or_none, + parse_iso8601, unescapeHTML, + update_url_query, ) diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index 79e888583..b31d566df 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -4,11 +4,11 @@ compat_urllib_parse_urlparse, ) from ..utils import ( - urljoin, int_or_none, parse_codecs, parse_qs, try_get, + urljoin, ) diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index d509e8879..89aee2728 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -5,9 +5,9 @@ from .aws import AWSIE from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, InAdvancePagedList, + clean_html, int_or_none, parse_iso8601, str_or_none, diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index ec9938b8c..cca86ed6c 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -4,8 +4,8 @@ compat_b64decode, ) from ..utils import ( - bytes_to_intlist, ExtractorError, + bytes_to_intlist, intlist_to_bytes, unified_strdate, ) diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index ef93b9276..44619a16c 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -6,8 +6,8 @@ determine_ext, int_or_none, parse_qs, - try_get, qualities, + try_get, ) diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py index 867782778..234703cf7 100644 --- a/yt_dlp/extractor/skynewsarabia.py +++ b/yt_dlp/extractor/skynewsarabia.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - parse_iso8601, parse_duration, + parse_iso8601, ) diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py index c0ff4f9aa..a41ad303a 100644 --- a/yt_dlp/extractor/sohu.py +++ b/yt_dlp/extractor/sohu.py @@ -8,13 +8,13 @@ ) from ..utils import ( ExtractorError, - int_or_none, float_or_none, - url_or_none, - unified_timestamp, - try_get, - urljoin, + int_or_none, traverse_obj, + try_get, + unified_timestamp, + url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 493eea2a6..773ddd344 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - try_get, - unified_timestamp -) +from ..utils import try_get, unified_timestamp class SovietsClosetBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index 43da34a32..c73f7971d 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, merge_dicts, parse_duration, parse_resolution, diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py index a98584a27..bdb8ef496 100644 --- a/yt_dlp/extractor/springboardplatform.py +++ b/yt_dlp/extractor/springboardplatform.py @@ -4,11 +4,11 @@ from ..utils import ( ExtractorError, int_or_none, - xpath_attr, - xpath_text, - xpath_element, unescapeHTML, unified_timestamp, + xpath_attr, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py index bb6e8f1ea..312a4fde0 100644 --- a/yt_dlp/extractor/startv.py +++ b/yt_dlp/extractor/startv.py @@ -3,10 +3,10 @@ compat_str, ) from ..utils import ( - clean_html, ExtractorError, - traverse_obj, + clean_html, int_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py index 2fd200f87..46a15e6a1 100644 --- a/yt_dlp/extractor/stitcher.py +++ b/yt_dlp/extractor/stitcher.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, clean_podcast_url, - ExtractorError, int_or_none, str_or_none, try_get, diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index 566f77782..20a70a7bc 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + OnDemandPagedList, format_field, int_or_none, - OnDemandPagedList, smuggle_url, ) diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 462861e0e..c303ac53a 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -3,8 +3,8 @@ ExtractorError, float_or_none, int_or_none, - try_get, parse_codecs, + try_get, ) diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index b9523c865..a847925e4 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -3,7 +3,7 @@ ExtractorError, UserNotLive, lowercase_escape, - traverse_obj + traverse_obj, ) diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 708873a95..501156e51 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -2,10 +2,10 @@ from .common import InfoExtractor from ..utils import ( - parse_duration, - int_or_none, - qualities, determine_ext, + int_or_none, + parse_duration, + qualities, ) diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py index bd2d73842..29e5e573f 100644 --- a/yt_dlp/extractor/syfy.py +++ b/yt_dlp/extractor/syfy.py @@ -1,7 +1,7 @@ from .adobepass import AdobePassIE from ..utils import ( - update_url_query, smuggle_url, + update_url_query, ) diff --git a/yt_dlp/extractor/taptap.py b/yt_dlp/extractor/taptap.py new file mode 100644 index 000000000..56f2f0ef4 --- /dev/null +++ b/yt_dlp/extractor/taptap.py @@ -0,0 +1,275 @@ +import re +import uuid + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + join_nonempty, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class TapTapBaseIE(InfoExtractor): + _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC' + _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get' + _INFO_API = None + _INFO_QUERY_KEY = 'id' + _DATA_PATH = None + _ID_PATH = None + _META_PATH = None + + def _get_api(self, url, video_id, query, **kwargs): + query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())} + return self._download_json(url, video_id, query=query, **kwargs)['data'] + + def _extract_video(self, video_id): + video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0] + + # h265 playlist contains both h265 and h264 formats + video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any)) + formats = self._extract_m3u8_formats(video_url, video_id, fatal=False) + for format in formats: + if re.search(r'^(hev|hvc|hvt)\d', format.get('vcodec', '')): + format['format_id'] = join_nonempty(format.get('format_id'), 'h265', delim='_') + + return { + 'id': str(video_id), + 'formats': formats, + **traverse_obj(video_data, ({ + 'duration': ('info', 'duration', {int_or_none}), + 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}), + }), get_all=False) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + query = {self._INFO_QUERY_KEY: video_id} + + data = traverse_obj( + self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH) + + metainfo = traverse_obj(data, self._META_PATH) + entries = [{ + **metainfo, + **self._extract_video(id) + } for id in set(traverse_obj(data, self._ID_PATH))] + + return self.playlist_result(entries, **metainfo, id=video_id) + + +class TapTapMomentIE(TapTapBaseIE): + _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P\d+)' + _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail' + _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id') + _META_PATH = ('moment', { + 'timestamp': ('created_time', {int_or_none}), + 'modified_timestamp': ('edited_time', {int_or_none}), + 'uploader': ('author', 'user', 'name', {str}), + 'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}), + 'title': ('topic', 'title', {str}), + 'description': ('topic', 'summary', {str}), + }) + _TESTS = [{ + 'url': 'https://www.taptap.cn/moment/194618230982052443', + 'info_dict': { + 'id': '194618230982052443', + 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星', + 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda', + 'timestamp': 1633453402, + 'upload_date': '20211005', + 'modified_timestamp': 1633453402, + 'modified_date': '20211005', + 'uploader': '乌酱', + 'uploader_id': '532896', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2202584', + 'ext': 'mp4', + 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星', + 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda', + 'duration': 66, + 'timestamp': 1633453402, + 'upload_date': '20211005', + 'modified_timestamp': 1633453402, + 'modified_date': '20211005', + 'uploader': '乌酱', + 'uploader_id': '532896', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.taptap.cn/moment/521630629209573493', + 'info_dict': { + 'id': '521630629209573493', + 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」', + 'description': 'md5:2c81245da864428c904d53ae4ad2182b', + 'timestamp': 1711425600, + 'upload_date': '20240326', + 'modified_timestamp': 1711425600, + 'modified_date': '20240326', + 'uploader': '崩坏:星穹铁道', + 'uploader_id': '414732580', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '4006511', + 'ext': 'mp4', + 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」', + 'description': 'md5:2c81245da864428c904d53ae4ad2182b', + 'duration': 173, + 'timestamp': 1711425600, + 'upload_date': '20240326', + 'modified_timestamp': 1711425600, + 'modified_date': '20240326', + 'uploader': '崩坏:星穹铁道', + 'uploader_id': '414732580', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.taptap.cn/moment/540493587511511299', + 'playlist_count': 2, + 'info_dict': { + 'id': '540493587511511299', + 'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!', + 'description': 'md5:d60842350e686ddb242291ddfb8e39c9', + 'timestamp': 1715920200, + 'upload_date': '20240517', + 'modified_timestamp': 1715942225, + 'modified_date': '20240517', + 'uploader': 'TapTap 编辑', + 'uploader_id': '7159244', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapAppIE(TapTapBaseIE): + _VALID_URL = r'https?://www\.taptap\.cn/app/(?P\d+)' + _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail' + _ID_PATH = (('app_videos', 'videos'), ..., 'video_id') + _META_PATH = { + 'title': ('title', {str}), + 'description': ('description', 'text', {str}, {clean_html}), + } + _TESTS = [{ + 'url': 'https://www.taptap.cn/app/168332', + 'info_dict': { + 'id': '168332', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + }, + 'playlist_count': 2, + 'playlist': [{ + 'info_dict': { + 'id': '4058443', + 'ext': 'mp4', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + 'duration': 26, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }, { + 'info_dict': { + 'id': '4058462', + 'ext': 'mp4', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + 'duration': 295, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapIntlBase(TapTapBaseIE): + _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0' + _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get' + + +class TapTapAppIntlIE(TapTapIntlBase): + _VALID_URL = r'https?://www\.taptap\.io/app/(?P\d+)' + _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail' + _DATA_PATH = 'app' + _ID_PATH = (('app_videos', 'videos'), ..., 'video_id') + _META_PATH = { + 'title': ('title', {str}), + 'description': ('description', 'text', {str}, {clean_html}), + } + _TESTS = [{ + 'url': 'https://www.taptap.io/app/233287', + 'info_dict': { + 'id': '233287', + 'title': '《虹彩六號 M》', + 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2149708997', + 'ext': 'mp4', + 'title': '《虹彩六號 M》', + 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', + 'duration': 78, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapPostIntlIE(TapTapIntlBase): + _VALID_URL = r'https?://www\.taptap\.io/post/(?P\d+)' + _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail' + _INFO_QUERY_KEY = 'id_str' + _DATA_PATH = 'post' + _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id') + _META_PATH = { + 'timestamp': ('published_time', {int_or_none}), + 'modified_timestamp': ('edited_time', {int_or_none}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': ('user', 'id', {int}, {str_or_none}), + 'title': ('title', {str}), + 'description': ('list_fields', 'summary', {str}), + } + _TESTS = [{ + 'url': 'https://www.taptap.io/post/571785', + 'info_dict': { + 'id': '571785', + 'title': 'Arknights x Rainbow Six Siege | Event PV', + 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7', + 'timestamp': 1614664951, + 'upload_date': '20210302', + 'modified_timestamp': 1614664951, + 'modified_date': '20210302', + 'uploader': 'TapTap Editor', + 'uploader_id': '80224473', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2149491903', + 'ext': 'mp4', + 'title': 'Arknights x Rainbow Six Siege | Event PV', + 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7', + 'duration': 122, + 'timestamp': 1614664951, + 'upload_date': '20210302', + 'modified_timestamp': 1614664951, + 'modified_date': '20210302', + 'uploader': 'TapTap Editor', + 'uploader_id': '80224473', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index 808c6c73d..4e178593f 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -2,8 +2,8 @@ from .turner import TurnerBaseIE from ..compat import ( - compat_urllib_parse_urlparse, compat_parse_qs, + compat_urllib_parse_urlparse, ) from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 5eac9aa3f..778fa1263 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -3,10 +3,10 @@ from .common import InfoExtractor from .wistia import WistiaIE from ..utils import ( - clean_html, ExtractorError, - int_or_none, + clean_html, get_element_by_class, + int_or_none, strip_or_none, urlencode_postdata, urljoin, diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py index 90a976297..740240993 100644 --- a/yt_dlp/extractor/teachertube.py +++ b/yt_dlp/extractor/teachertube.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, qualities, ) diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py index d32f81262..3fb899cac 100644 --- a/yt_dlp/extractor/teamcoco.py +++ b/yt_dlp/extractor/teamcoco.py @@ -13,8 +13,8 @@ parse_qs, traverse_obj, unified_timestamp, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py index dd802db5b..ba25cdcf6 100644 --- a/yt_dlp/extractor/teamtreehouse.py +++ b/yt_dlp/extractor/teamtreehouse.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, float_or_none, get_element_by_class, get_element_by_id, diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py index c28a15498..0969bbb03 100644 --- a/yt_dlp/extractor/ted.py +++ b/yt_dlp/extractor/ted.py @@ -2,14 +2,13 @@ import re from .common import InfoExtractor - from ..utils import ( int_or_none, + parse_duration, str_to_int, try_get, - url_or_none, unified_strdate, - parse_duration, + url_or_none, ) diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index 212af3785..1705c2d55 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( + determine_ext, js_to_json, qualities, - determine_ext, ) diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index 72f67e402..a45537541 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -1,89 +1,77 @@ -from .dplay import DPlayIE -from ..compat import compat_urlparse -from ..utils import ( - ExtractorError, - extract_attributes, -) +import functools + +from .dplay import DiscoveryPlusBaseIE +from ..utils import join_nonempty +from ..utils.traversal import traverse_obj -class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P[^/?#&]+)' - _GEO_COUNTRIES = ['DE'] +class Tele5IE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P[\w-]+)/(?P[\w-]+)(?:/(?P[\w-]+))?' _TESTS = [{ - 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', + # slug_a and slug_b + 'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane', 'info_dict': { - 'id': '1549416', + 'id': '6852024', 'ext': 'mp4', - 'upload_date': '20180814', - 'timestamp': 1534290623, - 'title': 'Pandorum', + 'title': 'Quarantäne', + 'description': 'md5:6af0373bd0fcc4f13e5d47701903d675', + 'episode': 'Episode 73', + 'episode_number': 73, + 'season': 'Season 4', + 'season_number': 4, + 'series': 'Stargate Atlantis', + 'upload_date': '20240525', + 'timestamp': 1716643200, + 'duration': 2503.2, + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg', + 'creators': ['Tele5'], + 'tags': [], }, - 'params': { - 'skip_download': True, - }, - 'skip': 'No longer available: "404 Seite nicht gefunden"', }, { - # jwplatform, nexx unavailable - 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/', + # only slug_a + 'url': 'https://tele5.de/mediathek/inside-out', 'info_dict': { - 'id': 'WJuiOlUp', + 'id': '6819502', 'ext': 'mp4', - 'upload_date': '20200603', - 'timestamp': 1591214400, - 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters', - 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97', + 'title': 'Inside out', + 'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd', + 'series': 'Inside out', + 'upload_date': '20240523', + 'timestamp': 1716494400, + 'duration': 5343.4, + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg', + 'creators': ['Tele5'], + 'tags': [], }, - 'params': { - 'skip_download': True, - }, - 'skip': 'No longer available, redirects to Filme page', }, { - 'url': 'https://tele5.de/mediathek/angel-of-mine/', + # playlist + 'url': 'https://tele5.de/mediathek/schlefaz', 'info_dict': { - 'id': '1252360', - 'ext': 'mp4', - 'upload_date': '20220109', - 'timestamp': 1641762000, - 'title': 'Angel of Mine', - 'description': 'md5:a72546a175e1286eb3251843a52d1ad7', + 'id': 'mediathek-schlefaz', }, - 'params': { - 'format': 'bestvideo', - }, - }, { - 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/anders-ist-sevda/', - 'only_matching': True, + 'playlist_mincount': 3, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - player_element = self._search_regex(r'(]+?>)', webpage, 'video player') - player_info = extract_attributes(player_element) - asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', )) - endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname - source_type = player_info.get('sourcetype') - if source_type: - endpoint = '%s-%s' % (source_type, endpoint) - try: - return self._get_disco_api_info(url, asset_id, endpoint, realm, country) - except ExtractorError as e: - if getattr(e, 'message', '') == 'Missing deviceId in context': - self.report_drm(video_id) - raise + parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b') + playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-') + + query = {'environment': 'tele5', 'v': '2'} + if not slug_b: + endpoint = f'page/{slug_a}' + query['parent_slug'] = parent_slug + else: + endpoint = f'videos/{slug_b}' + query['filter[show.slug]'] = slug_a + cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query) + + return self.playlist_result(map( + functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'), + traverse_obj(cms_data, ('blocks', ..., 'videoId', {str}))), playlist_id) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm}', + 'x-disco-client': 'Alps:HyogaPlayer:0.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py index 5fdcddd8b..380c84d98 100644 --- a/yt_dlp/extractor/telewebion.py +++ b/yt_dlp/extractor/telewebion.py @@ -1,4 +1,5 @@ from __future__ import annotations + import functools import json import textwrap diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py index 9318d6f9a..71e54eb0c 100644 --- a/yt_dlp/extractor/tempo.py +++ b/yt_dlp/extractor/tempo.py @@ -5,7 +5,7 @@ int_or_none, parse_iso8601, traverse_obj, - try_call + try_call, ) diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index 6618ea4e6..ae2cb483f 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -8,8 +8,8 @@ from ..aes import aes_cbc_encrypt_bytes from ..utils import ( ExtractorError, - float_or_none, determine_ext, + float_or_none, int_or_none, js_to_json, traverse_obj, diff --git a/yt_dlp/extractor/theguardian.py b/yt_dlp/extractor/theguardian.py index a231eccf4..fb6407715 100644 --- a/yt_dlp/extractor/theguardian.py +++ b/yt_dlp/extractor/theguardian.py @@ -10,7 +10,7 @@ parse_qs, traverse_obj, unified_strdate, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py index a991a4dfd..99f0d42ef 100644 --- a/yt_dlp/extractor/theintercept.py +++ b/yt_dlp/extractor/theintercept.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - parse_iso8601, - int_or_none, ExtractorError, + int_or_none, + parse_iso8601, ) diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index 9160f5ec6..eeb33a660 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -1,29 +1,27 @@ -import re -import time -import hmac import binascii import hashlib +import hmac +import re +import time - -from .once import OnceIE from .adobepass import AdobePassIE -from ..networking import Request +from .once import OnceIE +from ..networking import HEADRequest, Request from ..utils import ( - determine_ext, ExtractorError, + determine_ext, + find_xpath_attr, float_or_none, int_or_none, - parse_qs, - unsmuggle_url, - update_url_query, - xpath_with_ns, mimetype2ext, - find_xpath_attr, + parse_qs, traverse_obj, + unsmuggle_url, update_url, + update_url_query, urlhandle_detect_ext, + xpath_with_ns, ) -from ..networking import HEADRequest default_ns = 'http://www.w3.org/2005/SMIL21/Language' _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py index 9d3368ed7..04b083811 100644 --- a/yt_dlp/extractor/thisvid.py +++ b/yt_dlp/extractor/thisvid.py @@ -134,7 +134,7 @@ def _make_playlist_result(self, url): title = re.split( r'(?i)\s*\|\s*ThisVid\.com\s*$', self._og_search_title(webpage, default=None) - or self._html_search_regex(r'(?s)]*>(.+?)]*>(.+?)[\w\.-]+)/?(?:$|[#?])' - _WORKING = False + _VALID_URL = r'(?:tiktokuser:|https?://(?:www\.)?tiktok\.com/@)(?P[\w.-]+)/?(?:$|[#?])' _TESTS = [{ 'url': 'https://tiktok.com/@corgibobaa?lang=en', 'playlist_mincount': 45, 'info_dict': { - 'id': '6935371178089399301', + 'id': 'MS4wLjABAAAAepiJKgwWhulvCpSuUVsp7sgVVsFJbbNaLeQ6OQ0oAJERGDUIXhb2yxxHZedsItgT', 'title': 'corgibobaa', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@6820838815978423302', 'playlist_mincount': 5, 'info_dict': { - 'id': '6820838815978423302', + 'id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'title': '6820838815978423302', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@meme', 'playlist_mincount': 593, 'info_dict': { - 'id': '79005827461758976', + 'id': 'MS4wLjABAAAAiKfaDWeCsT3IHwY77zqWGtVRIy9v4ws1HbVi7auP1Vx7dJysU_hc5yRiGywojRD6', 'title': 'meme', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] + }, { + 'url': 'tiktokuser:MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ', + 'playlist_mincount': 31, + 'info_dict': { + 'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ', + }, }] + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0' + _API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/' - r''' # TODO: Fix by adding _signature to api_url - def _entries(self, webpage, user_id, username): - secuid = self._search_regex(r'\"secUid\":\"(?P[^\"]+)', webpage, username) - verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id') - if not verifyfp_cookie: - raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True) - api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor=' - cursor = '0' - for page in itertools.count(): - data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page) - for video in data_json.get('itemList', []): - video_id = video['id'] - video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}' - yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc'))) - if not data_json.get('hasMore'): - break - cursor = data_json['cursor'] - ''' - - def _video_entries_api(self, webpage, user_id, username): - query = { - 'user_id': user_id, - 'count': 21, - 'max_cursor': 0, - 'min_cursor': 0, - 'retry_type': 'no_retry', - 'device_id': self._DEVICE_ID, # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + def _build_web_query(self, sec_uid, cursor): + return { + 'aid': '1988', + 'app_language': 'en', + 'app_name': 'tiktok_web', + 'browser_language': 'en-US', + 'browser_name': 'Mozilla', + 'browser_online': 'true', + 'browser_platform': 'Win32', + 'browser_version': '5.0 (Windows)', + 'channel': 'tiktok_web', + 'cookie_enabled': 'true', + 'count': '15', + 'cursor': cursor, + 'device_id': self._DEVICE_ID, + 'device_platform': 'web_pc', + 'focus_state': 'true', + 'from_page': 'user', + 'history_len': '2', + 'is_fullscreen': 'false', + 'is_page_visible': 'true', + 'language': 'en', + 'os': 'windows', + 'priority_region': '', + 'referer': '', + 'region': 'US', + 'screen_height': '1080', + 'screen_width': '1920', + 'secUid': sec_uid, + 'type': '1', # pagination type: 0 == oldest-to-newest, 1 == newest-to-oldest + 'tz_name': 'UTC', + 'verifyFp': f'verify_{"".join(random.choices(string.hexdigits, k=7))}', + 'webcast_language': 'en', } - for page in itertools.count(1): - for retry in self.RetryManager(): - try: - post_list = self._call_api( - 'aweme/post', query, username, note=f'Downloading user video list page {page}', - errnote='Unable to download user video list') - except ExtractorError as e: - if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: - retry.error = e - continue - raise - yield from post_list.get('aweme_list', []) - if not post_list.get('has_more'): - break - query['max_cursor'] = post_list['max_cursor'] + def _entries(self, sec_uid, user_name): + display_id = user_name or sec_uid + seen_ids = set() - def _entries_api(self, user_id, videos): - for video in videos: - yield { - **self._parse_aweme_video_app(video), - 'extractor_key': TikTokIE.ie_key(), - 'extractor': 'TikTok', - 'webpage_url': f'https://tiktok.com/@{user_id}/video/{video["aweme_id"]}', - } + cursor = int(time.time() * 1E3) + for page in itertools.count(1): + response = self._download_json( + self._API_BASE_URL, display_id, f'Downloading page {page}', + query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT}) + + for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): + video_id = video['id'] + if video_id in seen_ids: + continue + seen_ids.add(video_id) + webpage_url = self._create_url(display_id, video_id) + yield self.url_result( + webpage_url, TikTokIE, + **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True)) + + old_cursor = cursor + cursor = traverse_obj( + response, ('itemList', -1, 'createTime', {lambda x: int(x * 1E3)})) + if not cursor or old_cursor == cursor: + # User may not have posted within this ~1 week lookback, so manually adjust cursor + cursor = old_cursor - 7 * 86_400_000 + # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed + if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'): + break + + def _get_sec_uid(self, user_url, user_name, msg): + webpage = self._download_webpage( + user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'}, + note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or '' + return (traverse_obj(self._get_universal_data(webpage, user_name), + ('webapp.user-detail', 'userInfo', 'user', 'secUid', {str})) + or traverse_obj(self._get_sigi_state(webpage, user_name), + ('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}), + ('UserModule', 'users', ..., 'secUid', {str}, any))) def _real_extract(self, url): - user_name = self._match_id(url) - webpage = self._download_webpage(url, user_name, headers={ - 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' - }) - user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID', default=None) or user_name + user_name, sec_uid = self._match_id(url), None + if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name): + user_name, sec_uid = None, mobj.group(0) + else: + sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user') + or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live')) - videos = LazyList(self._video_entries_api(webpage, user_id, user_name)) - thumbnail = traverse_obj(videos, (0, 'author', 'avatar_larger', 'url_list', 0)) + if not sec_uid: + webpage = self._download_webpage( + f'https://www.tiktok.com/embed/@{user_name}', user_name, + note='Downloading user embed page', fatal=False) or '' + data = traverse_obj(self._search_json( + r']+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>', + webpage, 'data', user_name, default={}), + ('source', 'data', f'/embed/@{user_name}', {dict})) - return self.playlist_result(self._entries_api(user_id, videos), user_id, user_name, thumbnail=thumbnail) + for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})): + webpage_url = self._create_url(user_name, aweme_id) + video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False) + sec_uid = self._parse_aweme_video_web( + video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id') + if sec_uid: + break + + if not sec_uid: + raise ExtractorError( + 'Unable to extract secondary user ID. If you are able to get the channel_id ' + 'from a video posted by this user, try using "tiktokuser:channel_id" as the ' + 'input URL (replacing `channel_id` with its actual value)', expected=True) + + return self.playlist_result(self._entries(sec_uid, user_name), sec_uid, user_name) class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor @@ -1083,6 +1121,64 @@ def _real_extract(self, url): return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id) +class TikTokCollectionIE(TikTokBaseIE): + IE_NAME = 'tiktok:collection' + _VALID_URL = r'https?://www\.tiktok\.com/@(?P[\w.-]+)/collection/(?P[^/?#]+)-(?P<id>\d+)/?(?:[?#]|$)' + _TESTS = [{ + # playlist should have exactly 9 videos + 'url': 'https://www.tiktok.com/@imanoreotwe/collection/count-test-7371330159376370462', + 'info_dict': { + 'id': '7371330159376370462', + 'title': 'imanoreotwe-count-test' + }, + 'playlist_count': 9 + }, { + # tests returning multiple pages of a large collection + 'url': 'https://www.tiktok.com/@imanoreotwe/collection/%F0%9F%98%82-7111887189571160875', + 'info_dict': { + 'id': '7111887189571160875', + 'title': 'imanoreotwe-%F0%9F%98%82' + }, + 'playlist_mincount': 100 + }] + _API_BASE_URL = 'https://www.tiktok.com/api/collection/item_list/' + _PAGE_COUNT = 30 + + def _build_web_query(self, collection_id, cursor): + return { + 'aid': '1988', + 'collectionId': collection_id, + 'count': self._PAGE_COUNT, + 'cursor': cursor, + 'sourceType': '113', + } + + def _entries(self, collection_id): + cursor = 0 + for page in itertools.count(1): + response = self._download_json( + self._API_BASE_URL, collection_id, f'Downloading page {page}', + query=self._build_web_query(collection_id, cursor)) + + for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): + video_id = video['id'] + author = traverse_obj(video, ('author', ('uniqueId', 'secUid', 'id'), {str}, any)) or '_' + webpage_url = self._create_url(author, video_id) + yield self.url_result( + webpage_url, TikTokIE, + **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True)) + + if not traverse_obj(response, 'hasMore'): + break + cursor += self._PAGE_COUNT + + def _real_extract(self, url): + collection_id, title, user_name = self._match_valid_url(url).group('id', 'title', 'user_id') + + return self.playlist_result( + self._entries(collection_id), collection_id, '-'.join((user_name, title))) + + class DouyinIE(TikTokBaseIE): _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)' _TESTS = [{ @@ -1098,7 +1194,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 19, 'timestamp': 1620905839, 'upload_date': '20210513', @@ -1123,7 +1218,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel': '杨超越工作室', - 'creators': ['杨超越工作室'], 'duration': 42, 'timestamp': 1625739481, 'upload_date': '20210708', @@ -1148,7 +1242,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 17, 'timestamp': 1619098692, 'upload_date': '20210422', @@ -1190,7 +1283,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 15, 'timestamp': 1621261163, 'upload_date': '20210517', diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py index aa7ee6c48..ccb2ef816 100644 --- a/yt_dlp/extractor/toypics.py +++ b/yt_dlp/extractor/toypics.py @@ -1,6 +1,7 @@ -from .common import InfoExtractor import re +from .common import InfoExtractor + class ToypicsIE(InfoExtractor): _WORKING = False diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py index 56e51fea8..3bdeedd43 100644 --- a/yt_dlp/extractor/triller.py +++ b/yt_dlp/extractor/triller.py @@ -14,8 +14,8 @@ traverse_obj, unified_timestamp, url_basename, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py index 86f0990e8..efedac180 100644 --- a/yt_dlp/extractor/trueid.py +++ b/yt_dlp/extractor/trueid.py @@ -1,13 +1,13 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_age_limit, traverse_obj, unified_timestamp, - url_or_none + url_or_none, ) diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index a26bdcaae..f2d0c5901 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -3,7 +3,7 @@ ExtractorError, int_or_none, traverse_obj, - urlencode_postdata + urlencode_postdata, ) diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 630d84bdc..b27db87bf 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -3,17 +3,17 @@ from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( - fix_xml_ampersands, - xpath_text, - int_or_none, - determine_ext, - float_or_none, - parse_duration, - xpath_attr, - update_url_query, ExtractorError, + determine_ext, + fix_xml_ampersands, + float_or_none, + int_or_none, + parse_duration, strip_or_none, + update_url_query, url_or_none, + xpath_attr, + xpath_text, ) diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index 7756aa3f5..9b19e7995 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -3,10 +3,10 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, - int_or_none, + determine_ext, float_or_none, + int_or_none, js_to_json, parse_iso8601, remove_end, diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index 9c0a111c0..cd35ff5fb 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -1,8 +1,8 @@ # encoding: utf-8 from .common import InfoExtractor from ..utils import ( - traverse_obj, UnsupportedError, + traverse_obj, ) diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py index b9f5e110e..dbebda4f4 100644 --- a/yt_dlp/extractor/tvanouvelles.py +++ b/yt_dlp/extractor/tvanouvelles.py @@ -1,7 +1,7 @@ import re -from .common import InfoExtractor from .brightcove import BrightcoveNewIE +from .common import InfoExtractor class TVANouvellesIE(InfoExtractor): diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py index 527681315..ac480580a 100644 --- a/yt_dlp/extractor/tvn24.py +++ b/yt_dlp/extractor/tvn24.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, NO_DEFAULT, + int_or_none, unescapeHTML, ) diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index a8d00e243..f1ebf027a 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -4,10 +4,10 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, dict_get, - ExtractorError, int_or_none, js_to_json, str_or_none, diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 48a6efe1c..29185d34b 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -4,8 +4,8 @@ from ..compat import compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, qualities, diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index 228c2366e..d43bdc2ff 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -2,10 +2,10 @@ from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, extract_attributes, try_get, urlencode_postdata, - ExtractorError, ) diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py index e8e1fc666..9249550c9 100644 --- a/yt_dlp/extractor/tweakers.py +++ b/yt_dlp/extractor/tweakers.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, determine_ext, + int_or_none, mimetype2ext, ) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index fc80dade8..1a11162a0 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1,10 +1,10 @@ +import functools import json import random import re from .common import InfoExtractor from .periscope import PeriscopeBaseIE, PeriscopeIE -from ..compat import functools # isort: split from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py index 10668ac4b..d5849d29b 100644 --- a/yt_dlp/extractor/udn.py +++ b/yt_dlp/extractor/udn.py @@ -1,12 +1,12 @@ import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( determine_ext, int_or_none, js_to_json, ) -from ..compat import compat_urlparse class UDNEmbedIE(InfoExtractor): diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py index f914613c0..f141804c8 100644 --- a/yt_dlp/extractor/ukcolumn.py +++ b/yt_dlp/extractor/ukcolumn.py @@ -1,11 +1,11 @@ -from ..utils import ( - unescapeHTML, - urljoin, - ExtractorError, -) from .common import InfoExtractor from .vimeo import VimeoIE from .youtube import YoutubeIE +from ..utils import ( + ExtractorError, + unescapeHTML, + urljoin, +) class UkColumnIE(InfoExtractor): diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index 7f97fc95f..928e6e1c2 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( - dict_get, ExtractorError, - int_or_none, ISO639Utils, + dict_get, + int_or_none, parse_age_limit, try_get, unified_timestamp, diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py index 3243f3e3b..42a28c509 100644 --- a/yt_dlp/extractor/usatoday.py +++ b/yt_dlp/extractor/usatoday.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, get_element_by_attribute, @@ -6,7 +7,6 @@ try_get, update_url_query, ) -from ..compat import compat_str class USATodayIE(InfoExtractor): diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 5df241653..046e3d768 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -7,10 +7,10 @@ compat_urlparse, ) from ..utils import ( - encode_data_uri, ExtractorError, - int_or_none, + encode_data_uri, float_or_none, + int_or_none, join_nonempty, mimetype2ext, str_or_none, diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py index c3aeeb961..f6ce5b357 100644 --- a/yt_dlp/extractor/ustudio.py +++ b/yt_dlp/extractor/ustudio.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..utils import ( int_or_none, - unified_strdate, unescapeHTML, + unified_strdate, ) diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index ef44d421e..205f8ea63 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, mimetype2ext, diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py index 3f2dddbe9..a2e90226a 100644 --- a/yt_dlp/extractor/vesti.py +++ b/yt_dlp/extractor/vesti.py @@ -1,8 +1,8 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError from .rutv import RUTVIE +from ..utils import ExtractorError class VestiIE(InfoExtractor): diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index aa40227a7..7715d6839 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index d31908fb1..b072d9d73 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -10,10 +10,10 @@ from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, - int_or_none, OnDemandPagedList, + clean_html, + int_or_none, parse_age_limit, str_or_none, try_get, diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 770aa284d..6322bb04b 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - clean_html, ExtractorError, + clean_html, format_field, get_element_by_class, int_or_none, diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index 44353b7fc..e1219a8a0 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( - format_field, float_or_none, + format_field, get_element_by_id, int_or_none, str_to_int, diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 91b976403..ac96ade18 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,21 +1,21 @@ import base64 import functools -import re import itertools +import re from .common import InfoExtractor from ..compat import compat_str, compat_urlparse from ..networking import HEADRequest, Request from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + OnDemandPagedList, clean_html, determine_ext, - ExtractorError, get_element_by_class, - js_to_json, int_or_none, + js_to_json, merge_dicts, - OnDemandPagedList, parse_filesize, parse_iso8601, parse_qs, @@ -26,8 +26,8 @@ unified_timestamp, unsmuggle_url, urlencode_postdata, - urljoin, urlhandle_detect_ext, + urljoin, ) diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index 6f9af9f64..480f49b7b 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -1,8 +1,8 @@ -import re import json -import uuid import random +import re import urllib.parse +import uuid from .common import InfoExtractor from ..compat import compat_str @@ -10,10 +10,10 @@ ExtractorError, int_or_none, remove_end, + smuggle_url, strip_or_none, traverse_obj, try_get, - smuggle_url, unified_timestamp, unsmuggle_url, url_or_none, diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 28d502685..9a3c75b62 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -20,6 +20,7 @@ parse_resolution, str_or_none, str_to_int, + traverse_obj, try_call, unescapeHTML, unified_timestamp, @@ -27,7 +28,6 @@ url_or_none, urlencode_postdata, urljoin, - traverse_obj, ) @@ -467,13 +467,13 @@ def _real_extract(self, url): 'source_preference': 1, 'height': height, }) - elif format_id == 'hls': + elif format_id.startswith('hls') and format_id != 'hls_live_playback': fmts, subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False, live=is_live) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - elif format_id.startswith('dash_'): + elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'): fmts, subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=format_id, fatal=False) formats.extend(fmts) diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py deleted file mode 100644 index ef77bedd2..000000000 --- a/yt_dlp/extractor/voot.py +++ /dev/null @@ -1,212 +0,0 @@ -import json -import time -import uuid - -from .common import InfoExtractor -from ..compat import compat_str -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - jwt_decode_hs256, - parse_age_limit, - traverse_obj, - try_call, - try_get, - unified_strdate, -) - - -class VootBaseIE(InfoExtractor): - _NETRC_MACHINE = 'voot' - _GEO_BYPASS = False - _LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.' - _TOKEN = None - _EXPIRY = 0 - _API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'} - - def _perform_login(self, username, password): - if self._TOKEN and self._EXPIRY: - return - - if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)): - VootBaseIE._TOKEN = password - VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp'] - self.report_login() - - # Mobile number as username is not supported - elif not username.isdigit(): - check_username = self._download_json( - 'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({ - 'type': 'email', - 'email': username - }, separators=(',', ':')).encode(), headers={ - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - }, note='Checking username', expected_status=403) - if not traverse_obj(check_username, ('isExist', {bool})): - if traverse_obj(check_username, ('status', 'code', {int})) == 9999: - self.raise_geo_restricted(countries=['IN']) - raise ExtractorError('Incorrect username', expected=True) - auth_token = traverse_obj(self._download_json( - 'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({ - 'type': 'traditional', - 'deviceId': str(uuid.uuid4()), - 'deviceBrand': 'PC/MAC', - 'data': { - 'email': username, - 'password': password - } - }, separators=(',', ':')).encode(), headers={ - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - }, note='Logging in', expected_status=400), ('data', 'authToken', {dict})) - if not auth_token: - raise ExtractorError('Incorrect password', expected=True) - VootBaseIE._TOKEN = auth_token['accessToken'] - VootBaseIE._EXPIRY = auth_token['expirationTime'] - - else: - raise ExtractorError(self._LOGIN_HINT, expected=True) - - def _check_token_expiry(self): - if int(time.time()) >= self._EXPIRY: - raise ExtractorError('Access token has expired', expected=True) - - def _real_initialize(self): - if not self._TOKEN: - self.raise_login_required(self._LOGIN_HINT, method=None) - self._check_token_expiry() - - -class VootIE(VootBaseIE): - _WORKING = False - _VALID_URL = r'''(?x) - (?: - voot:| - https?://(?:www\.)?voot\.com/? - (?: - movies?/[^/]+/| - (?:shows|kids)/(?:[^/]+/){4} - ) - ) - (?P<id>\d{3,}) - ''' - _TESTS = [{ - 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353', - 'info_dict': { - 'id': '441353', - 'ext': 'mp4', - 'title': 'Is this the end of Kamini?', - 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1', - 'timestamp': 1472103000, - 'upload_date': '20160825', - 'series': 'Ishq Ka Rang Safed', - 'season_number': 1, - 'episode': 'Is this the end of Kamini?', - 'episode_number': 340, - 'release_date': '20160825', - 'season': 'Season 1', - 'age_limit': 13, - 'duration': 1146.0, - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925', - 'only_matching': True, - }, { - 'url': 'https://www.voot.com/movies/pandavas-5/424627', - 'only_matching': True, - }, { - 'url': 'https://www.voot.com/movie/fight-club/621842', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - media_info = self._download_json( - 'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id, - query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN}) - - try: - m3u8_url = self._download_json( - 'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id, - 'Downloading playback JSON', data=b'{}', headers={ - **self.geo_verification_headers(), - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - 'platform': 'androidwebdesktop', - 'vootid': video_id, - 'voottoken': self._TOKEN, - })['m3u8'] - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 400: - self._check_token_expiry() - raise - - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') - self._remove_duplicate_formats(formats) - - return { - 'id': video_id, - # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p - 'formats': traverse_obj(formats, ( - lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)), - 'http_headers': self._API_HEADERS, - **traverse_obj(media_info, ('result', 0, { - 'title': ('fullTitle', {str}), - 'description': ('fullSynopsis', {str}), - 'series': ('showName', {str}), - 'season_number': ('season', {int_or_none}), - 'episode': ('fullTitle', {str}), - 'episode_number': ('episode', {int_or_none}), - 'timestamp': ('uploadTime', {int_or_none}), - 'release_date': ('telecastDate', {unified_strdate}), - 'age_limit': ('ageNemonic', {parse_age_limit}), - 'duration': ('duration', {float_or_none}), - })), - } - - -class VootSeriesIE(VootBaseIE): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})' - _TESTS = [{ - 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002', - 'playlist_mincount': 442, - 'info_dict': { - 'id': '100002', - }, - }, { - 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003', - 'playlist_mincount': 341, - 'info_dict': { - 'id': '100003', - }, - }] - _SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common' - _SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}' - - def _entries(self, show_id): - show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id) - for season in show_json.get('result', []): - page_num = 1 - season_id = try_get(season, lambda x: x['id'], compat_str) - season_json = self._download_json(self._SEASON_API.format(season_id, page_num), - video_id=season_id, - note='Downloading JSON metadata page %d' % page_num) - episodes_json = season_json.get('result', []) - while episodes_json: - page_num += 1 - for episode in episodes_json: - video_id = episode.get('id') - yield self.url_result( - 'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id) - episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num), - video_id=season_id, - note='Downloading JSON metadata page %d' % page_num)['result'] - - def _real_extract(self, url): - show_id = self._match_id(url) - return self.playlist_result(self._entries(show_id), playlist_id=show_id) diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py index a1a9c1708..3ac0f8387 100644 --- a/yt_dlp/extractor/walla.py +++ b/yt_dlp/extractor/walla.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..utils import ( - xpath_text, int_or_none, + xpath_text, ) diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 74501b1d2..1cfed2da5 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import traverse_obj diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index f80f140ed..0b7ddd239 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -6,16 +6,16 @@ compat_urlparse, ) from ..utils import ( + ExtractorError, determine_ext, dict_get, - ExtractorError, js_to_json, strip_jsonp, try_get, unified_strdate, update_url_query, - urlhandle_detect_ext, url_or_none, + urlhandle_detect_ext, ) diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index 2fca745aa..b6a659385 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -1,6 +1,6 @@ +import itertools import json import random -import itertools import urllib.parse from .common import InfoExtractor diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index f2808cd9f..492891d78 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -1,12 +1,12 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, qualities, try_call, try_get, - ExtractorError, ) -from ..compat import compat_str class WhoWatchIE(InfoExtractor): diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index f9bf092df..d7d77c0db 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, parse_duration, urlencode_postdata, - ExtractorError, ) diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index 5e590e2f4..0ef4e8e53 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -1,13 +1,13 @@ -from .common import InfoExtractor -from ..utils import ( - try_get, - ExtractorError, -) - import json import random import re +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + try_get, +) + class WPPilotBaseIE(InfoExtractor): _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s' diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py index 86e264679..35fe30362 100644 --- a/yt_dlp/extractor/wsj.py +++ b/yt_dlp/extractor/wsj.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 01ac5ddb6..0b3a620ec 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -4,11 +4,11 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, determine_ext, dict_get, extract_attributes, - ExtractorError, float_or_none, int_or_none, parse_duration, diff --git a/yt_dlp/extractor/xiaohongshu.py b/yt_dlp/extractor/xiaohongshu.py new file mode 100644 index 000000000..faad9d923 --- /dev/null +++ b/yt_dlp/extractor/xiaohongshu.py @@ -0,0 +1,83 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, + js_to_json, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class XiaoHongShuIE(InfoExtractor): + _VALID_URL = r'https?://www\.xiaohongshu\.com/explore/(?P<id>[\da-f]+)' + IE_DESC = '小红书' + _TESTS = [{ + 'url': 'https://www.xiaohongshu.com/explore/6411cf99000000001300b6d9', + 'md5': '2a87a77ddbedcaeeda8d7eae61b61228', + 'info_dict': { + 'id': '6411cf99000000001300b6d9', + 'ext': 'mp4', + 'uploader_id': '5c31698d0000000007018a31', + 'description': '#今日快乐今日发[话题]# #吃货薯看这里[话题]# #香妃蛋糕[话题]# #小五卷蛋糕[话题]# #新手蛋糕卷[话题]#', + 'title': '香妃蛋糕也太香了吧🔥不需要卷❗️绝对的友好', + 'tags': ['今日快乐今日发', '吃货薯看这里', '香妃蛋糕', '小五卷蛋糕', '新手蛋糕卷'], + 'duration': 101.726, + 'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + initial_state = self._search_json( + r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', display_id, transform_source=js_to_json) + + note_info = traverse_obj(initial_state, ('note', 'noteDetailMap', display_id, 'note')) + video_info = traverse_obj(note_info, ('video', 'media', 'stream', ('h264', 'av1', 'h265'), ...)) + + formats = [] + for info in video_info: + format_info = traverse_obj(info, { + 'fps': ('fps', {int_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'vcodec': ('videoCodec', {str}), + 'acodec': ('audioCodec', {str}), + 'abr': ('audioBitrate', {int_or_none}), + 'vbr': ('videoBitrate', {int_or_none}), + 'audio_channels': ('audioChannels', {int_or_none}), + 'tbr': ('avgBitrate', {int_or_none}), + 'format': ('qualityType', {str}), + 'filesize': ('size', {int_or_none}), + 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}) + }) + + formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), { + lambda u: url_or_none(u) and {'url': u, **format_info}}))) + + thumbnails = [] + for image_info in traverse_obj(note_info, ('imageList', ...)): + thumbnail_info = traverse_obj(image_info, { + 'height': ('height', {int_or_none}), + 'width': ('width', {int_or_none}), + }) + for thumb_url in traverse_obj(image_info, (('urlDefault', 'urlPre'), {url_or_none})): + thumbnails.append({ + 'url': thumb_url, + **thumbnail_info, + }) + + return { + 'id': display_id, + 'formats': formats, + 'thumbnails': thumbnails, + 'title': self._html_search_meta(['og:title'], webpage, default=None), + **traverse_obj(note_info, { + 'title': ('title', {str}), + 'description': ('desc', {str}), + 'tags': ('tagList', ..., 'name', {str}), + 'uploader_id': ('user', 'userId', {str}), + }), + } diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index 1452aaec3..74d4f0419 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -2,9 +2,9 @@ from .common import InfoExtractor from ..utils import ( + NO_DEFAULT, determine_ext, int_or_none, - NO_DEFAULT, str_to_int, ) diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py index 8dd1cd9ef..322e86570 100644 --- a/yt_dlp/extractor/xstream.py +++ b/yt_dlp/extractor/xstream.py @@ -2,11 +2,11 @@ from .common import InfoExtractor from ..utils import ( + find_xpath_attr, int_or_none, parse_iso8601, - xpath_with_ns, xpath_text, - find_xpath_attr, + xpath_with_ns, ) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index a489033ab..6b16ac291 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -3,9 +3,9 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, int_or_none, parse_duration, ) diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py index e3e3a9fe6..aa6c84d09 100644 --- a/yt_dlp/extractor/xxxymovies.py +++ b/yt_dlp/extractor/xxxymovies.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - parse_duration, int_or_none, + parse_duration, ) diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py index 794dc3eae..acfe69bf4 100644 --- a/yt_dlp/extractor/yandexmusic.py +++ b/yt_dlp/extractor/yandexmusic.py @@ -5,8 +5,8 @@ from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, try_get, ) diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 6d4e31bf3..0e047aa16 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -1,19 +1,27 @@ +import itertools import re from .common import InfoExtractor from ..utils import ( + ExtractorError, + clean_html, extract_attributes, + get_element_by_class, + get_element_by_id, + get_elements_html_by_class, int_or_none, merge_dicts, - str_to_int, + parse_count, + parse_qs, traverse_obj, unified_strdate, url_or_none, + urljoin, ) class YouPornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?/?(?:[#?]|$)' _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)'] _TESTS = [{ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', @@ -34,7 +42,7 @@ class YouPornIE(InfoExtractor): 'tags': list, 'age_limit': 18, }, - 'skip': 'This video has been disabled', + 'skip': 'This video has been deactivated', }, { # Unknown uploader 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', @@ -72,7 +80,6 @@ class YouPornIE(InfoExtractor): 'id': '16290308', 'age_limit': 18, 'categories': [], - 'description': str, # TODO: detect/remove SEO spam description in ytdl backport 'display_id': 'tinderspecial-trailer1', 'duration': 298.0, 'ext': 'mp4', @@ -90,7 +97,17 @@ def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') self._set_cookie('.youporn.com', 'age_verified', '1') webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id) - definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions'] + + watchable = self._search_regex( + r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''', + webpage, 'watchability', default=None) + if not watchable: + msg = re.split(r'\s{2}', clean_html(get_element_by_id('mainContent', webpage)) or '')[0] + raise ExtractorError( + f'{self.IE_NAME} says: {msg}' if msg else 'Video unavailable', expected=True) + + player_vars = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id) + definitions = player_vars['mediaDefinitions'] def get_format_data(data, stream_type): info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any)) @@ -143,8 +160,10 @@ def get_format_data(data, stream_type): thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail') - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'duration', fatal=False)) + duration = traverse_obj(player_vars, ('duration', {int_or_none})) + if duration is None: + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration', fatal=False)) uploader = self._html_search_regex( r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', @@ -160,11 +179,11 @@ def get_format_data(data, stream_type): view_count = None views = self._search_regex( - r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage, - 'views', default=None) + r'(<div [^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>', + webpage, 'views', default=None) if views: - view_count = str_to_int(extract_attributes(views).get('data-value')) - comment_count = str_to_int(self._search_regex( + view_count = parse_count(extract_attributes(views).get('data-value')) + comment_count = parse_count(self._search_regex( r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', default=None)) @@ -182,7 +201,8 @@ def extract_tag_box(regex, title): data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) data.pop('url', None) - return merge_dicts(data, { + + result = merge_dicts(data, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -198,3 +218,350 @@ def extract_tag_box(regex, title): 'age_limit': age_limit, 'formats': formats, }) + + # Remove SEO spam "description" + description = result.get('description') + if description and description.startswith(f'Watch {result.get("title")} online'): + del result['description'] + + return result + + +class YouPornListBase(InfoExtractor): + def _get_next_url(self, url, pl_id, html): + return urljoin(url, self._search_regex( + r'''<a [^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''', + get_element_by_id('next', html) or '', 'next page', + group='url', default=None)) + + @classmethod + def _get_title_from_slug(cls, title_slug): + return re.sub(r'[_-]', ' ', title_slug) + + def _entries(self, url, pl_id, html=None, page_num=None): + start = page_num or 1 + for page in itertools.count(start): + if not html: + html = self._download_webpage( + url, pl_id, note=f'Downloading page {page}', fatal=page == start) + if not html: + return + for element in get_elements_html_by_class('video-title', html): + if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})): + yield self.url_result(video_url) + + if page_num is not None: + return + next_url = self._get_next_url(url, pl_id, html) + if not next_url or next_url == url: + return + url = next_url + html = None + + def _real_extract(self, url, html=None): + m_dict = self._match_valid_url(url).groupdict() + pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort')) + qs = {k: v[-1] for k, v in parse_qs(url).items() if v} + + base_id = pl_id or 'YouPorn' + title = self._get_title_from_slug(base_id) + if page_type: + title = f'{page_type.capitalize()} {title}' + base_id = [base_id.lower()] + if sort is None: + title += ' videos' + else: + title = f'{title} videos by {re.sub(r"[_-]", " ", sort)}' + base_id.append(sort) + if qs: + filters = list(map('='.join, sorted(qs.items()))) + title += f' ({",".join(filters)})' + base_id.extend(filters) + pl_id = '/'.join(base_id) + + return self.playlist_result( + self._entries(url, pl_id, html=html, page_num=int_or_none(qs.get('page'))), + playlist_id=pl_id, playlist_title=title) + + +class YouPornCategoryIE(YouPornListBase): + IE_DESC = 'YouPorn category, with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>category)/(?P<id>[^/?#&]+) + (?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/category/popular-with-women/popular/', + 'info_dict': { + 'id': 'popular-with-women/popular', + 'title': 'Category popular with women videos by popular', + }, + 'playlist_mincount': 39, + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/category/popular-with-women/duration/?min_minutes=10', + 'info_dict': { + 'id': 'popular-with-women/duration/min_minutes=10', + 'title': 'Category popular with women videos by duration (min_minutes=10)', + }, + 'playlist_mincount': 2, + # 'playlist_maxcount': 30, + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/category/popular-with-women/popular?page=1', + 'info_dict': { + 'id': 'popular-with-women/popular/page=1', + 'title': 'Category popular with women videos by popular (page=1)', + }, + 'playlist_count': 36, + }] + + +class YouPornChannelIE(YouPornListBase): + IE_DESC = 'YouPorn channel, with sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>channel)/(?P<id>[^/?#&]+) + (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/channel/x-feeds/', + 'info_dict': { + 'id': 'x-feeds', + 'title': 'Channel X-Feeds videos', + }, + 'playlist_mincount': 37, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1', + 'info_dict': { + 'id': 'x-feeds/duration/page=1', + 'title': 'Channel X-Feeds videos by duration (page=1)', + }, + 'playlist_count': 24, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return re.sub(r'_', ' ', title_slug).title() + + +class YouPornCollectionIE(YouPornListBase): + IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>collection)s/videos/(?P<id>\d+) + (?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/collections/videos/33044251/', + 'info_dict': { + 'id': '33044251', + 'title': 'Collection Sexy Lips videos', + 'uploader': 'ph-littlewillyb', + }, + 'playlist_mincount': 50, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1', + 'info_dict': { + 'id': '33044251/time/page=1', + 'title': 'Collection Sexy Lips videos by time (page=1)', + 'uploader': 'ph-littlewillyb', + }, + 'playlist_count': 20, + }] + + def _real_extract(self, url): + pl_id = self._match_id(url) + html = self._download_webpage(url, pl_id) + playlist = super()._real_extract(url, html=html) + infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class( + 'collection-infos', html)) or '') + title, uploader = self._search_regex( + r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)', + infos, 'title/uploader', group=('title', 'uploader'), default=(None, None)) + if title: + playlist.update({ + 'title': playlist['title'].replace(playlist['id'].split('/')[0], title), + 'uploader': uploader, + }) + + return playlist + + +class YouPornTagIE(YouPornListBase): + IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + porn(?P<type>tag)s/(?P<id>[^/?#&]+) + (?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/porntags/austrian', + 'info_dict': { + 'id': 'austrian', + 'title': 'Tag austrian videos', + }, + 'playlist_mincount': 33, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10', + 'info_dict': { + 'id': 'austrian/duration/min_minutes=10', + 'title': 'Tag austrian videos by duration (min_minutes=10)', + }, + 'playlist_mincount': 10, + # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3, + # or more, varying with number of ads; let's set max as 9x4 + # NB col 1 may not be shown in non-JS page with site CSS and zoom 100% + # 'playlist_maxcount': 32, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/porntags/austrian/?page=1', + 'info_dict': { + 'id': 'austrian/page=1', + 'title': 'Tag austrian videos (page=1)', + }, + 'playlist_mincount': 32, + # 'playlist_maxcount': 34, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }] + + def _real_extract(self, url): + self.report_warning( + 'YouPorn tag pages are not correctly cached and ' + 'often return incorrect results', only_once=True) + return super()._real_extract(url) + + +class YouPornStarIE(YouPornListBase): + IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>pornstar)/(?P<id>[^/?#&]+) + (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/pornstar/daynia/', + 'info_dict': { + 'id': 'daynia', + 'title': 'Pornstar Daynia videos', + 'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+', + }, + 'playlist_mincount': 40, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/pornstar/daynia/?page=1', + 'info_dict': { + 'id': 'daynia/page=1', + 'title': 'Pornstar Daynia videos (page=1)', + 'description': 're:.{180,}', + }, + 'playlist_count': 26, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return re.sub(r'_', ' ', title_slug).title() + + def _real_extract(self, url): + pl_id = self._match_id(url) + html = self._download_webpage(url, pl_id) + playlist = super()._real_extract(url, html=html) + INFO_ELEMENT_RE = r'''(?x) + <div [^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*> + (?P<info>[\s\S]+?)(?:</div>\s*){6,} + ''' + + if infos := self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default=''): + infos = re.sub( + r'(?:\s*nl=nl)+\s*', ' ', + re.sub(r'(?u)\s+', ' ', clean_html(re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '') + + return { + **playlist, + 'description': infos.strip() or None, + } + + +class YouPornVideosIE(YouPornListBase): + IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?:(?P<id>browse)/)? + (?P<sort>(?(id) + (?:duration|rating|time|views)| + (?:most_(?:favou?rit|view)ed|recommended|top_rated)?)) + (?:[/#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/', + 'info_dict': { + 'id': 'youporn', + 'title': 'YouPorn videos', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/recommended', + 'info_dict': { + 'id': 'youporn/recommended', + 'title': 'YouPorn videos by recommended', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/top_rated', + 'info_dict': { + 'id': 'youporn/top_rated', + 'title': 'YouPorn videos by top rated', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/browse/time', + 'info_dict': { + 'id': 'browse/time', + 'title': 'YouPorn videos by time', + }, + 'only_matching': True, + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2', + 'info_dict': { + 'id': 'youporn/most_favorited/max_minutes=2/res=VR', + 'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)', + }, + 'playlist_mincount': 10, + # 'playlist_maxcount': 28, + }, { + 'note': 'Filtered paginated list with several pages', + 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5', + 'info_dict': { + 'id': 'youporn/most_favorited/max_minutes=5/res=VR', + 'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)', + }, + 'playlist_mincount': 45, + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/browse/time?page=1', + 'info_dict': { + 'id': 'browse/time/page=1', + 'title': 'YouPorn videos by time (page=1)', + }, + 'playlist_count': 36, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return 'YouPorn' if title_slug == 'browse' else title_slug diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index ca2d8b165..3f8c1e109 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1325,6 +1325,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', + 'timestamp': 1349198244, } }, { @@ -1368,6 +1369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', + 'timestamp': 1349198244, }, 'params': { 'skip_download': True, @@ -1454,6 +1456,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1401991663, }, }, { @@ -1513,6 +1516,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', 'uploader_id': '@ProjektMelody', + 'timestamp': 1577508724, }, }, { @@ -1618,6 +1622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@Olympics', 'uploader_id': '@Olympics', 'channel_is_verified': True, + 'timestamp': 1440707674, }, 'params': { 'skip_download': 'requires avconv', @@ -1651,6 +1656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': '孫ᄋᄅ', 'uploader_url': 'https://www.youtube.com/@AllenMeow', 'uploader_id': '@AllenMeow', + 'timestamp': 1299776999, }, }, # url_encoded_fmt_stream_map is empty string @@ -1794,6 +1800,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }], 'params': {'skip_download': True}, + 'skip': 'Not multifeed anymore', }, { # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) @@ -1902,6 +1909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'The Berkman Klein Center for Internet & Society', 'uploader_id': '@BKCHarvard', 'uploader_url': 'https://www.youtube.com/@BKCHarvard', + 'timestamp': 1422422076, }, 'params': { 'skip_download': True, @@ -1937,6 +1945,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@BernieSanders', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1447987198, }, 'params': { 'skip_download': True, @@ -2000,6 +2009,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Vsauce', 'comment_count': int, 'channel_is_verified': True, + 'timestamp': 1484761047, }, 'params': { 'skip_download': True, @@ -2155,6 +2165,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'l\'Or Vert asbl', 'uploader_url': 'https://www.youtube.com/@ElevageOrVert', 'uploader_id': '@ElevageOrVert', + 'timestamp': 1497343210, }, 'params': { 'skip_download': True, @@ -2193,6 +2204,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Csharp-video-tutorialsBlogspot', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1377976349, }, 'params': { 'skip_download': True, @@ -2275,6 +2287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@CBSMornings', 'comment_count': int, 'channel_is_verified': True, + 'timestamp': 1405513526, } }, { @@ -2292,7 +2305,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'channel': 'Walk around Japan', 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'], - 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp', + 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg', 'age_limit': 0, 'availability': 'public', 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', @@ -2302,6 +2315,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Walk around Japan', 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124', 'uploader_id': '@walkaroundjapan7124', + 'timestamp': 1605884416, }, 'params': { 'skip_download': True, @@ -2397,6 +2411,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1395685455, }, 'params': {'format': 'mhtml', 'skip_download': True} }, { # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) @@ -2426,37 +2441,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@LeonNguyen', 'uploader_id': '@LeonNguyen', 'heatmap': 'count:100', + 'timestamp': 1641170939, } - }, { - # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date - 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', - 'info_dict': { - 'id': '2NUZ8W2llS4', - 'ext': 'mp4', - 'title': 'The NP that test your phone performance 🙂', - 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d', - 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA', - 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA', - 'duration': 21, - 'view_count': int, - 'age_limit': 0, - 'categories': ['Gaming'], - 'tags': 'count:23', - 'playable_in_embed': True, - 'live_status': 'not_live', - 'upload_date': '20220102', - 'like_count': int, - 'availability': 'public', - 'channel': 'Leon Nguyen', - 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', - 'comment_count': int, - 'channel_follower_count': int, - 'uploader': 'Leon Nguyen', - 'uploader_url': 'https://www.youtube.com/@LeonNguyen', - 'uploader_id': '@LeonNguyen', - 'heatmap': 'count:100', - }, - 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']} }, { # date text is premiered video, ensure upload date in UTC (published 1641172509) 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', @@ -2488,38 +2474,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1641172509, } }, - { # continuous livestream. Microformat upload date should be preferred. - # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27 - 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU', + { # continuous livestream. + # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00 + 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk', 'info_dict': { - 'id': 'kgx4WGK0oNU', - 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'id': 'jfKfPfyJRdk', 'ext': 'mp4', - 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA', - 'availability': 'public', - 'age_limit': 0, - 'release_timestamp': 1637975704, - 'upload_date': '20210619', - 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', - 'live_status': 'is_live', - 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg', - 'channel': 'Abao in Tokyo', - 'channel_follower_count': int, - 'release_date': '20211127', - 'tags': 'count:39', - 'categories': ['People & Blogs'], + 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow', 'like_count': int, - 'view_count': int, - 'playable_in_embed': True, - 'description': 'md5:2ef1d002cad520f65825346e2084e49d', + 'uploader': 'Lofi Girl', + 'categories': ['Music'], 'concurrent_view_count': int, - 'uploader': 'Abao in Tokyo', - 'uploader_url': 'https://www.youtube.com/@abaointokyo', - 'uploader_id': '@abaointokyo', + 'playable_in_embed': True, + 'timestamp': 1657627949, + 'release_date': '20220712', + 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow', + 'description': 'md5:13a6f76df898f5674f9127139f3df6f7', + 'age_limit': 0, + 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg', + 'release_timestamp': 1657641570, + 'uploader_url': 'https://www.youtube.com/@LofiGirl', + 'channel_follower_count': int, + 'channel_is_verified': True, + 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to', + 'view_count': int, + 'live_status': 'is_live', + 'tags': 'count:32', + 'channel': 'Lofi Girl', + 'availability': 'public', + 'upload_date': '20220712', + 'uploader_id': '@LofiGirl', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA', 'info_dict': { @@ -2545,6 +2534,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@lesmiscore', 'uploader': 'Lesmiscore', 'uploader_url': 'https://www.youtube.com/@lesmiscore', + 'timestamp': 1648005313, } }, { # Prefer primary title+description language metadata by default @@ -2572,6 +2562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1662677394, }, 'params': {'skip_download': True} }, { @@ -2585,7 +2576,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 5, 'live_status': 'not_live', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', - 'upload_date': '20220728', + 'upload_date': '20220729', 'view_count': int, 'categories': ['People & Blogs'], 'thumbnail': r're:^https?://.*\.jpg', @@ -2598,6 +2589,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1659073275, + 'like_count': int, }, 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}}, 'expected_warnings': [r'Preferring "fr" translated fields'], @@ -2663,6 +2656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_id': '@ProjektMelody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', + 'timestamp': 1577508724, }, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, }, @@ -2697,6 +2691,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@sana_natori', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1671798112, }, }, { @@ -2766,6 +2761,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries', 'uploader_id': '@ChristopherSykesDocumentaries', 'heatmap': 'count:100', + 'timestamp': 1211825920, }, 'params': { 'skip_download': True, @@ -4654,19 +4650,31 @@ def process_language(container, base_url, lang_code, sub_name, query): 'uploader_id': channel_handle, 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None), }) + + # We only want timestamp IF it has time precision AND a timezone + # Currently the uploadDate in microformats appears to be in US/Pacific timezone. + timestamp = ( + parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT) + or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT) + ) + upload_date = ( + dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else + ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate')) + )) + + # In the case we cannot get the timestamp: # The upload date for scheduled, live and past live streams / premieres in microformats # may be different from the stream date. Although not in UTC, we will prefer it in this case. # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 - upload_date = ( - unified_strdate(get_first(microformats, 'uploadDate')) - or unified_strdate(search_meta('uploadDate'))) - if not upload_date or ( - live_status in ('not_live', None) - and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', []) - ): + if not upload_date or (not timestamp and live_status in ('not_live', None)): + # this should be in UTC, as configured in the cookie/client context upload_date = strftime_or_none( self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date + info['upload_date'] = upload_date + info['timestamp'] = timestamp if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): # Newly uploaded videos' HLS formats are potentially problematic and need to be checked diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py index 88f526bbc..2a12aa509 100644 --- a/yt_dlp/extractor/zapiks.py +++ b/yt_dlp/extractor/zapiks.py @@ -2,11 +2,11 @@ from .common import InfoExtractor from ..utils import ( + int_or_none, parse_duration, parse_iso8601, - xpath_with_ns, xpath_text, - int_or_none, + xpath_with_ns, ) diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index c24b33874..18b22a5c7 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -1,5 +1,5 @@ from .common import InfoExtractor -from ..utils import format_field, float_or_none, int_or_none +from ..utils import float_or_none, format_field, int_or_none class ZhihuIE(InfoExtractor): diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index ff5eac89a..909a7a3ae 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -10,8 +10,8 @@ int_or_none, join_nonempty, try_call, + url_or_none, urljoin, - url_or_none ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py index 2f3b4c47f..8d3156d64 100644 --- a/yt_dlp/extractor/zype.py +++ b/yt_dlp/extractor/zype.py @@ -3,8 +3,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - dict_get, ExtractorError, + dict_get, int_or_none, js_to_json, parse_iso8601, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 3aecae170..f713e9b9d 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -485,7 +485,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): }, 'aliases': { 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], - '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], + '2021': ['2022', 'no-certifi', 'filename-sanitization'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': [], } @@ -527,7 +527,8 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs): metavar='CLIENT[:OS]', dest='impersonate', default=None, help=( 'Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. ' - 'Pass --impersonate="" to impersonate any client.'), + 'Pass --impersonate="" to impersonate any client. Note that forcing impersonation ' + 'for all requests may have a detrimental impact on download speed and stability'), ) network.add_option( '--list-impersonate-targets', diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 01d54b846..29dd9e79c 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1134,7 +1134,7 @@ def is_path_like(f): return isinstance(f, (str, bytes, os.PathLike)) -def extract_timezone(date_str): +def extract_timezone(date_str, default=None): m = re.search( r'''(?x) ^.{8,}? # >=8 char non-TZ prefix, if present @@ -1146,21 +1146,25 @@ def extract_timezone(date_str): (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm $) ''', date_str) + timezone = None + if not m: m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str) timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip()) if timezone is not None: date_str = date_str[:-len(m.group('tz'))] - timezone = dt.timedelta(hours=timezone or 0) + timezone = dt.timedelta(hours=timezone) else: date_str = date_str[:-len(m.group('tz'))] - if not m.group('sign'): - timezone = dt.timedelta() - else: + if m.group('sign'): sign = 1 if m.group('sign') == '+' else -1 timezone = dt.timedelta( hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes'))) + + if timezone is None and default is not NO_DEFAULT: + timezone = default or dt.timedelta() + return timezone, date_str @@ -1172,10 +1176,9 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): date_str = re.sub(r'\.[0-9]+', '', date_str) - if timezone is None: - timezone, date_str = extract_timezone(date_str) + timezone, date_str = extract_timezone(date_str, timezone) - with contextlib.suppress(ValueError): + with contextlib.suppress(ValueError, TypeError): date_format = f'%Y-%m-%d{delimiter}%H:%M:%S' dt_ = dt.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt_.timetuple()) @@ -2527,7 +2530,7 @@ def fixup(url): return False # "#" cannot be stripped out since it is part of the URI # However, it can be safely stripped out if following a whitespace - return re.split(r'\s#', url, 1)[0].rstrip() + return re.split(r'\s#', url, maxsplit=1)[0].rstrip() with contextlib.closing(batch_fd) as fd: return [url for url in map(fixup, fd) if url] diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 22c2c048d..a90b288c9 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.04.09' +__version__ = '2024.05.27' -RELEASE_GIT_HEAD = 'ff07792676f404ffff6ee61b5638c9dc1a33a37a' +RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.04.09' +_pkg_version = '2024.05.27'