diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 036ce43489..cd7ead7966 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -107,10 +107,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.10" - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniforge-variant: Mambaforge use-mamba: true @@ -121,16 +121,14 @@ jobs: - name: Install Requirements run: | sudo apt -y install zip pandoc man sed - reqs=$(mktemp) - cat > "$reqs" << EOF + cat > ./requirements.txt << EOF python=3.10.* - pyinstaller - cffi brotli-python - secretstorage EOF - sed -E '/^(brotli|secretstorage).*/d' requirements.txt >> "$reqs" - mamba create -n build --file "$reqs" + python devscripts/install_deps.py --print \ + --exclude brotli --exclude brotlicffi \ + --include secretstorage --include pyinstaller >> ./requirements.txt + mamba create -n build --file ./requirements.txt - name: Prepare run: | @@ -144,9 +142,9 @@ jobs: run: | unset LD_LIBRARY_PATH # Harmful; set by setup-python conda activate build - python pyinst.py --onedir + python -m bundle.pyinstaller --onedir (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) - python pyinst.py + python -m bundle.pyinstaller mv ./dist/yt-dlp_linux ./yt-dlp_linux mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip @@ -164,13 +162,15 @@ jobs: done - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: build-${{ github.job }} path: | yt-dlp yt-dlp.tar.gz yt-dlp_linux yt-dlp_linux.zip + compression-level: 0 linux_arm: needs: process @@ -201,17 +201,18 @@ jobs: dockerRunArgs: --volume "${PWD}/repo:/repo" install: | # Installing Python 3.10 from the Deadsnakes repo raises errors apt update - apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip + apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip python3.8 -m pip install -U pip setuptools wheel - # Cannot access requirements.txt from the repo directory at this stage - python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage + # Cannot access any files from the repo directory at this stage + python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi run: | cd repo - python3.8 -m pip install -U Pyinstaller secretstorage -r requirements.txt # Cached version may be out of date + python3.8 devscripts/install_deps.py -o --include build + python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python3.8 devscripts/make_lazy_extractors.py - python3.8 pyinst.py + python3.8 -m bundle.pyinstaller if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" @@ -224,10 +225,12 @@ jobs: fi - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: build-linux_${{ matrix.architecture }} path: | # run-on-arch-action designates armv7l as armv7 repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} + compression-level: 0 macos: needs: process @@ -240,9 +243,10 @@ jobs: - name: Install Requirements run: | brew install coreutils - python3 -m pip install -U --user pip setuptools wheel + python3 devscripts/install_deps.py --user -o --include build + python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt # We need to ignore wheels otherwise we break universal2 builds - python3 -m pip install -U --user --no-binary :all: Pyinstaller -r requirements.txt + python3 -m pip install -U --user --no-binary :all: -r requirements.txt - name: Prepare run: | @@ -250,9 +254,9 @@ jobs: python3 devscripts/make_lazy_extractors.py - name: Build run: | - python3 pyinst.py --target-architecture universal2 --onedir + python3 -m bundle.pyinstaller --target-architecture universal2 --onedir (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) - python3 pyinst.py --target-architecture universal2 + python3 -m bundle.pyinstaller --target-architecture universal2 - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION @@ -265,11 +269,13 @@ jobs: [[ "$version" != "$downgraded_version" ]] - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: build-${{ github.job }} path: | dist/yt-dlp_macos dist/yt-dlp_macos.zip + compression-level: 0 macos_legacy: needs: process @@ -293,8 +299,8 @@ jobs: - name: Install Requirements run: | brew install coreutils - python3 -m pip install -U --user pip setuptools wheel - python3 -m pip install -U --user Pyinstaller -r requirements.txt + python3 devscripts/install_deps.py --user -o --include build + python3 devscripts/install_deps.py --user --include pyinstaller - name: Prepare run: | @@ -302,7 +308,7 @@ jobs: python3 devscripts/make_lazy_extractors.py - name: Build run: | - python3 pyinst.py + python3 -m bundle.pyinstaller mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy - name: Verify --update-to @@ -316,10 +322,12 @@ jobs: [[ "$version" != "$downgraded_version" ]] - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: build-${{ github.job }} path: | dist/yt-dlp_macos_legacy + compression-level: 0 windows: needs: process @@ -328,13 +336,14 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: # 3.8 is used for Win7 support python-version: "3.8" - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds - python -m pip install -U pip setuptools wheel py2exe - pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt + python devscripts/install_deps.py -o --include build + python devscripts/install_deps.py --include py2exe + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" - name: Prepare run: | @@ -342,10 +351,10 @@ jobs: python devscripts/make_lazy_extractors.py - name: Build run: | - python setup.py py2exe + python -m bundle.py2exe Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe - python pyinst.py - python pyinst.py --onedir + python -m bundle.pyinstaller + python -m bundle.pyinstaller --onedir Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - name: Verify --update-to @@ -362,12 +371,14 @@ jobs: } - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: build-${{ github.job }} path: | dist/yt-dlp.exe dist/yt-dlp_min.exe dist/yt-dlp_win.zip + compression-level: 0 windows32: needs: process @@ -376,14 +387,15 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.8" architecture: "x86" - name: Install Requirements run: | - python -m pip install -U pip setuptools wheel - pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt + python devscripts/install_deps.py -o --include build + python devscripts/install_deps.py + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" - name: Prepare run: | @@ -391,7 +403,7 @@ jobs: python devscripts/make_lazy_extractors.py - name: Build run: | - python pyinst.py + python -m bundle.pyinstaller - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION @@ -407,10 +419,12 @@ jobs: } - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: build-${{ github.job }} path: | dist/yt-dlp_x86.exe + compression-level: 0 meta_files: if: inputs.meta_files && always() && !cancelled() @@ -424,7 +438,11 @@ jobs: - windows32 runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v3 + - uses: actions/download-artifact@v4 + with: + path: artifact + pattern: build-* + merge-multiple: true - name: Make SHA2-SUMS files run: | @@ -459,8 +477,10 @@ jobs: done - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: + name: build-${{ github.job }} path: | - SHA*SUMS* _update_spec + SHA*SUMS* + compression-level: 0 diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index eaaf03dee4..ba8630630c 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -49,11 +49,11 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install test requirements - run: pip install pytest -r requirements.txt + run: python3 ./devscripts/install_deps.py --include dev - name: Run tests continue-on-error: False run: | diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 9f47d67187..7256804d93 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -11,11 +11,11 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install test requirements - run: pip install pytest -r requirements.txt + run: python3 ./devscripts/install_deps.py --include dev - name: Run tests continue-on-error: true run: python3 ./devscripts/run_tests.py download @@ -38,11 +38,11 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install test requirements - run: pip install pytest -r requirements.txt + run: python3 ./devscripts/install_deps.py --include dev - name: Run tests continue-on-error: true run: python3 ./devscripts/run_tests.py download diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 84fca62d4d..3114e7bdd6 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -11,11 +11,11 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python 3.8 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' - name: Install test requirements - run: pip install pytest -r requirements.txt + run: python3 ./devscripts/install_deps.py --include dev - name: Run tests run: | python3 -m yt_dlp -v || true @@ -26,10 +26,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 - name: Install flake8 - run: pip install flake8 + run: python3 ./devscripts/install_deps.py -o --include dev - name: Make lazy extractors - run: python devscripts/make_lazy_extractors.py + run: python3 ./devscripts/make_lazy_extractors.py - name: Run flake8 run: flake8 . diff --git a/.github/workflows/release-master.yml b/.github/workflows/release-master.yml index 0664137a94..a84547580b 100644 --- a/.github/workflows/release-master.yml +++ b/.github/workflows/release-master.yml @@ -6,8 +6,10 @@ on: paths: - "yt_dlp/**.py" - "!yt_dlp/version.py" - - "setup.py" - - "pyinst.py" + - "bundle/*.py" + - "pyproject.toml" + - "Makefile" + - ".github/workflows/build.yml" concurrency: group: release-master permissions: diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml index 2e623a67c6..f459a3a17e 100644 --- a/.github/workflows/release-nightly.yml +++ b/.github/workflows/release-nightly.yml @@ -18,7 +18,14 @@ jobs: - name: Check for new commits id: check_for_new_commits run: | - relevant_files=("yt_dlp/*.py" ':!yt_dlp/version.py' "setup.py" "pyinst.py") + relevant_files=( + "yt_dlp/*.py" + ':!yt_dlp/version.py' + "bundle/*.py" + "pyproject.toml" + "Makefile" + ".github/workflows/build.yml" + ) echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT" release: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 69b5e31529..f5c6a793e1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -71,7 +71,7 @@ jobs: with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.10" @@ -246,15 +246,16 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + with: + fetch-depth: 0 + - uses: actions/setup-python@v5 with: python-version: "3.10" - name: Install Requirements run: | sudo apt -y install pandoc man - python -m pip install -U pip setuptools wheel twine - python -m pip install -U -r requirements.txt + python devscripts/install_deps.py -o --include build - name: Prepare env: @@ -266,14 +267,19 @@ jobs: run: | python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}" python devscripts/make_lazy_extractors.py - sed -i -E "s/(name=')[^']+(', # package name)/\1${{ env.pypi_project }}\2/" setup.py + sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml - name: Build run: | rm -rf dist/* make pypi-files + printf '%s\n\n' \ + 'Official repository: ' \ + '**PS**: Some links in this document will not work since this is a copy of the README.md from Github' > ./README.md.new + cat ./README.md >> ./README.md.new && mv -f ./README.md.new ./README.md python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" - python setup.py sdist bdist_wheel + make clean-cache + python -m build --no-isolation . - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 @@ -290,8 +296,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/download-artifact@v3 - - uses: actions/setup-python@v4 + - uses: actions/download-artifact@v4 + with: + path: artifact + pattern: build-* + merge-multiple: true + - uses: actions/setup-python@v5 with: python-version: "3.10" diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index bc2f056c05..0000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,10 +0,0 @@ -include AUTHORS -include Changelog.md -include LICENSE -include README.md -include completions/*/* -include supportedsites.md -include yt-dlp.1 -include requirements.txt -recursive-include devscripts * -recursive-include test * diff --git a/Makefile b/Makefile index c85b24c13e..c33984f6f7 100644 --- a/Makefile +++ b/Makefile @@ -6,11 +6,11 @@ doc: README.md CONTRIBUTING.md issuetemplates supportedsites ot: offlinetest tar: yt-dlp.tar.gz -# Keep this list in sync with MANIFEST.in +# Keep this list in sync with pyproject.toml includes/artifacts # intended use: when building a source distribution, -# make pypi-files && python setup.py sdist +# make pypi-files && python3 -m build -sn . pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ - completions yt-dlp.1 requirements.txt setup.cfg devscripts/* test/* + completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/* .PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites @@ -21,7 +21,7 @@ clean-test: *.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ - yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap + yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS clean-cache: find . \( \ -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ @@ -38,11 +38,13 @@ MANDIR ?= $(PREFIX)/man SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python3 -# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) +# $(shell) and $(error) are no-ops in BSD Make and the != variable assignment operator is not supported by GNU Make <4.0 +VERSION_CHECK != echo supported +VERSION_CHECK ?= $(error GNU Make 4+ or BSD Make is required) +CHECK_VERSION := $(VERSION_CHECK) -# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 -MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) +# set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2 +MARKDOWN != if [ "`pandoc -v | head -n1 | cut -d' ' -f2 | head -c1`" -ge "2" ]; then echo markdown-smart; else echo markdown; fi install: lazy-extractors yt-dlp yt-dlp.1 completions mkdir -p $(DESTDIR)$(BINDIR) @@ -73,24 +75,24 @@ test: offlinetest: codetest $(PYTHON) -m pytest -k "not download" -# XXX: This is hard to maintain -CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking -yt-dlp: yt_dlp/*.py yt_dlp/*/*.py +CODE_FOLDERS != find yt_dlp -type f -name '__init__.py' -exec dirname {} \+ | grep -v '/__' | sort +CODE_FILES != for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done +yt-dlp: $(CODE_FILES) mkdir -p zip for d in $(CODE_FOLDERS) ; do \ mkdir -p zip/$$d ;\ cp -pPR $$d/*.py zip/$$d/ ;\ done - touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py + (cd zip && touch -t 200001010101 $(CODE_FILES)) mv zip/yt_dlp/__main__.py zip/ - cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py __main__.py + (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py) rm -rf zip echo '#!$(PYTHON)' > yt-dlp cat yt-dlp.zip >> yt-dlp rm yt-dlp.zip chmod a+x yt-dlp -README.md: yt_dlp/*.py yt_dlp/*/*.py devscripts/make_readme.py +README.md: $(CODE_FILES) devscripts/make_readme.py COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py CONTRIBUTING.md: README.md devscripts/make_contributing.py @@ -115,19 +117,19 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1 rm -f yt-dlp.1.temp.md -completions/bash/yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/bash-completion.in +completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in mkdir -p completions/bash $(PYTHON) devscripts/bash-completion.py -completions/zsh/_yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/zsh-completion.in +completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in mkdir -p completions/zsh $(PYTHON) devscripts/zsh-completion.py -completions/fish/yt-dlp.fish: yt_dlp/*.py yt_dlp/*/*.py devscripts/fish-completion.in +completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in mkdir -p completions/fish $(PYTHON) devscripts/fish-completion.py -_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py') +_EXTRACTOR_FILES != find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py' yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ @@ -141,15 +143,12 @@ yt-dlp.tar.gz: all --exclude '__pycache__' \ --exclude '.pytest_cache' \ --exclude '.git' \ + --exclude '__pyinstaller' \ -- \ README.md supportedsites.md Changelog.md LICENSE \ CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ - Makefile MANIFEST.in yt-dlp.1 README.txt completions \ - setup.py setup.cfg yt-dlp yt_dlp requirements.txt \ - devscripts test + Makefile yt-dlp.1 README.txt completions .gitignore \ + setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test -AUTHORS: .mailmap - git shortlog -s -n | cut -f2 | sort > AUTHORS - -.mailmap: - git shortlog -s -e -n | awk '!(out[$$NF]++) { $$1="";sub(/^[ \t]+/,""); print}' > .mailmap +AUTHORS: + git shortlog -s -n HEAD | cut -f2 | sort > AUTHORS diff --git a/README.md b/README.md index 7dc3bb2f6c..2fcb099176 100644 --- a/README.md +++ b/README.md @@ -321,19 +321,21 @@ ### Deprecated ## COMPILE ### Standalone PyInstaller Builds -To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). Once you have all the necessary dependencies installed, simply run `pyinst.py`. The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. +To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. You can run the following commands: - python3 -m pip install -U pyinstaller -r requirements.txt - python3 devscripts/make_lazy_extractors.py - python3 pyinst.py +``` +python3 devscripts/install_deps.py --include pyinstaller +python3 devscripts/make_lazy_extractors.py +python3 -m bundle.pyinstaller +``` On some systems, you may need to use `py` or `python` instead of `python3`. -`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). +`bundle/pyinstaller.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). **Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. -**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly. +**Important**: Running `pyinstaller` directly **without** using `bundle/pyinstaller.py` is **not** officially supported. This may or may not work correctly. ### Platform-independent Binary (UNIX) You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. @@ -346,14 +348,17 @@ ### Standalone Py2Exe Builds (Windows) While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run. -If you wish to build it anyway, install Python and py2exe, and then simply run `setup.py py2exe` +If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: - py -m pip install -U py2exe -r requirements.txt - py devscripts/make_lazy_extractors.py - py setup.py py2exe +``` +py devscripts/install_deps.py --include py2exe +py devscripts/make_lazy_extractors.py +py -m bundle.py2exe +``` ### Related scripts +* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. * **`devscripts/update-version.py`** - Update the version number based on current date. * **`devscripts/set-variant.py`** - Set the build variant of the executable. * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. diff --git a/bundle/__init__.py b/bundle/__init__.py new file mode 100644 index 0000000000..932b79829c --- /dev/null +++ b/bundle/__init__.py @@ -0,0 +1 @@ +# Empty file diff --git a/bundle/py2exe.py b/bundle/py2exe.py new file mode 100755 index 0000000000..a7e4113f1f --- /dev/null +++ b/bundle/py2exe.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +# Allow execution from anywhere +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import warnings + +from py2exe import freeze + +from devscripts.utils import read_version + +VERSION = read_version() + + +def main(): + warnings.warn( + 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' + 'It is recommended to run "pyinst.py" to build using pyinstaller instead') + + return freeze( + console=[{ + 'script': './yt_dlp/__main__.py', + 'dest_base': 'yt-dlp', + 'icon_resources': [(1, 'devscripts/logo.ico')], + }], + version_info={ + 'version': VERSION, + 'description': 'A youtube-dl fork with additional features and patches', + 'comments': 'Official repository: ', + 'product_name': 'yt-dlp', + 'product_version': VERSION, + }, + options={ + 'bundle_files': 0, + 'compressed': 1, + 'optimize': 2, + 'dist_dir': './dist', + 'excludes': [ + # py2exe cannot import Crypto + 'Crypto', + 'Cryptodome', + # py2exe appears to confuse this with our socks library. + # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. + 'urllib3.contrib.socks' + ], + 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], + # Modules that are only imported dynamically must be added here + 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated', + 'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'], + }, + zipfile=None, + ) + + +if __name__ == '__main__': + main() diff --git a/pyinst.py b/bundle/pyinstaller.py old mode 100644 new mode 100755 similarity index 98% rename from pyinst.py rename to bundle/pyinstaller.py index c36f6acd4f..db9dbfde51 --- a/pyinst.py +++ b/bundle/pyinstaller.py @@ -4,7 +4,7 @@ import os import sys -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import platform diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py new file mode 100755 index 0000000000..715e5b0440 --- /dev/null +++ b/devscripts/install_deps.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +# Allow execution from anywhere +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import argparse +import re +import subprocess + +from devscripts.tomlparse import parse_toml +from devscripts.utils import read_file + + +def parse_args(): + parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp') + parser.add_argument( + 'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)') + parser.add_argument( + '-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency') + parser.add_argument( + '-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group') + parser.add_argument( + '-o', '--only-optional', action='store_true', help='Only install optional dependencies') + parser.add_argument( + '-p', '--print', action='store_true', help='Only print a requirements.txt to stdout') + parser.add_argument( + '-u', '--user', action='store_true', help='Install with pip as --user') + return parser.parse_args() + + +def main(): + args = parse_args() + toml_data = parse_toml(read_file(args.input)) + deps = toml_data['project']['dependencies'] + targets = deps.copy() if not args.only_optional else [] + + for exclude in args.exclude or []: + for dep in deps: + simplified_dep = re.match(r'[\w-]+', dep)[0] + if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep): + targets.remove(dep) + + optional_deps = toml_data['project']['optional-dependencies'] + for include in args.include or []: + group = optional_deps.get(include) + if group: + targets.extend(group) + + if args.print: + for target in targets: + print(target) + return + + pip_args = [sys.executable, '-m', 'pip', 'install', '-U'] + if args.user: + pip_args.append('--user') + pip_args.extend(targets) + + return subprocess.call(pip_args) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/devscripts/tomlparse.py b/devscripts/tomlparse.py new file mode 100755 index 0000000000..85ac4eef78 --- /dev/null +++ b/devscripts/tomlparse.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +""" +Simple parser for spec compliant toml files + +A simple toml parser for files that comply with the spec. +Should only be used to parse `pyproject.toml` for `install_deps.py`. + +IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED! +""" + +from __future__ import annotations + +import datetime +import json +import re + +WS = r'(?:[\ \t]*)' +STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'') +SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+') +KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*') +EQUALS_RE = re.compile(rf'={WS}') +WS_RE = re.compile(WS) + +_SUBTABLE = rf'(?P^\[(?P\[)?(?P{KEY_RE.pattern})\]\]?)' +EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE) + +LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*') +LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+') + + +def parse_key(value: str): + for match in SINGLE_KEY_RE.finditer(value): + if match[0][0] == '"': + yield json.loads(match[0]) + elif match[0][0] == '\'': + yield match[0][1:-1] + else: + yield match[0] + + +def get_target(root: dict, paths: list[str], is_list=False): + target = root + + for index, key in enumerate(paths, 1): + use_list = is_list and index == len(paths) + result = target.get(key) + if result is None: + result = [] if use_list else {} + target[key] = result + + if isinstance(result, dict): + target = result + elif use_list: + target = {} + result.append(target) + else: + target = result[-1] + + assert isinstance(target, dict) + return target + + +def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern): + index += 1 + + if match := ws_re.match(data, index): + index = match.end() + + while data[index] != end: + index = yield True, index + + if match := ws_re.match(data, index): + index = match.end() + + if data[index] == ',': + index += 1 + + if match := ws_re.match(data, index): + index = match.end() + + assert data[index] == end + yield False, index + 1 + + +def parse_value(data: str, index: int): + if data[index] == '[': + result = [] + + indices = parse_enclosed(data, index, ']', LIST_WS_RE) + valid, index = next(indices) + while valid: + index, value = parse_value(data, index) + result.append(value) + valid, index = indices.send(index) + + return index, result + + if data[index] == '{': + result = {} + + indices = parse_enclosed(data, index, '}', WS_RE) + valid, index = next(indices) + while valid: + valid, index = indices.send(parse_kv_pair(data, index, result)) + + return index, result + + if match := STRING_RE.match(data, index): + return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1] + + match = LEFTOVER_VALUE_RE.match(data, index) + assert match + value = match[0].strip() + for func in [ + int, + float, + datetime.time.fromisoformat, + datetime.date.fromisoformat, + datetime.datetime.fromisoformat, + {'true': True, 'false': False}.get, + ]: + try: + value = func(value) + break + except Exception: + pass + + return match.end(), value + + +def parse_kv_pair(data: str, index: int, target: dict): + match = KEY_RE.match(data, index) + if not match: + return None + + *keys, key = parse_key(match[0]) + + match = EQUALS_RE.match(data, match.end()) + assert match + index = match.end() + + index, value = parse_value(data, index) + get_target(target, keys)[key] = value + return index + + +def parse_toml(data: str): + root = {} + target = root + + index = 0 + while True: + match = EXPRESSION_RE.search(data, index) + if not match: + break + + if match.group('subtable'): + index = match.end() + path, is_list = match.group('path', 'is_list') + target = get_target(root, list(parse_key(path)), bool(is_list)) + continue + + index = parse_kv_pair(data, match.start(), target) + assert index is not None + + return root + + +def main(): + import argparse + from pathlib import Path + + parser = argparse.ArgumentParser() + parser.add_argument('infile', type=Path, help='The TOML file to read as input') + args = parser.parse_args() + + with args.infile.open('r', encoding='utf-8') as file: + data = file.read() + + def default(obj): + if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)): + return obj.isoformat() + + print(json.dumps(parse_toml(data), default=default)) + + +if __name__ == '__main__': + main() diff --git a/pyproject.toml b/pyproject.toml index 97718ec431..5ef013279a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,120 @@ [build-system] -build-backend = 'setuptools.build_meta' -# https://github.com/yt-dlp/yt-dlp/issues/5941 -# https://github.com/pypa/distutils/issues/17 -requires = ['setuptools > 50'] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "yt-dlp" +maintainers = [ + {name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"}, + {name = "Grub4K", email = "contact@grub4k.xyz"}, + {name = "bashonly", email = "bashonly@protonmail.com"}, +] +description = "A youtube-dl fork with additional features and patches" +readme = "README.md" +requires-python = ">=3.8" +keywords = [ + "youtube-dl", + "video-downloader", + "youtube-downloader", + "sponsorblock", + "youtube-dlc", + "yt-dlp", +] +license = {file = "LICENSE"} +classifiers = [ + "Topic :: Multimedia :: Video", + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "License :: OSI Approved :: The Unlicense (Unlicense)", + "Operating System :: OS Independent", +] +dynamic = ["version"] +dependencies = [ + "brotli; implementation_name=='cpython'", + "brotlicffi; implementation_name!='cpython'", + "certifi", + "mutagen", + "pycryptodomex", + "requests>=2.31.0,<3", + "urllib3>=1.26.17,<3", + "websockets>=12.0", +] + +[project.optional-dependencies] +secretstorage = [ + "cffi", + "secretstorage", +] +build = [ + "build", + "hatchling", + "pip", + "wheel", +] +dev = [ + "flake8", + "isort", + "pytest", +] +pyinstaller = ["pyinstaller>=6.3"] +py2exe = ["py2exe>=0.12"] + +[project.urls] +Documentation = "https://github.com/yt-dlp/yt-dlp#readme" +Repository = "https://github.com/yt-dlp/yt-dlp" +Tracker = "https://github.com/yt-dlp/yt-dlp/issues" +Funding = "https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators" + +[project.scripts] +yt-dlp = "yt_dlp:main" + +[project.entry-points.pyinstaller40] +hook-dirs = "yt_dlp.__pyinstaller:get_hook_dirs" + +[tool.hatch.build.targets.sdist] +include = [ + "/yt_dlp", + "/devscripts", + "/test", + "/.gitignore", # included by default, needed for auto-excludes + "/Changelog.md", + "/LICENSE", # included as license + "/pyproject.toml", # included by default + "/README.md", # included as readme + "/setup.cfg", + "/supportedsites.md", +] +exclude = ["/yt_dlp/__pyinstaller"] +artifacts = [ + "/yt_dlp/extractor/lazy_extractors.py", + "/completions", + "/AUTHORS", # included by default + "/README.txt", + "/yt-dlp.1", +] + +[tool.hatch.build.targets.wheel] +packages = ["yt_dlp"] +exclude = ["/yt_dlp/__pyinstaller"] +artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] + +[tool.hatch.build.targets.wheel.shared-data] +"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp" +"completions/zsh/_yt-dlp" = "share/zsh/site-functions/_yt-dlp" +"completions/fish/yt-dlp.fish" = "share/fish/vendor_completions.d/yt-dlp.fish" +"README.txt" = "share/doc/yt_dlp/README.txt" +"yt-dlp.1" = "share/man/man1/yt-dlp.1" + +[tool.hatch.version] +path = "yt_dlp/version.py" +pattern = "_pkg_version = '(?P[^']+)'" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 06ff82a800..0000000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -mutagen -pycryptodomex -brotli; implementation_name=='cpython' -brotlicffi; implementation_name!='cpython' -certifi -requests>=2.31.0,<3 -urllib3>=1.26.17,<3 -websockets>=12.0 diff --git a/setup.cfg b/setup.cfg index a799f7293e..aeb4cee586 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,3 @@ -[wheel] -universal = true - - [flake8] exclude = build,venv,.tox,.git,.pytest_cache ignore = E402,E501,E731,E741,W503 diff --git a/setup.py b/setup.py deleted file mode 100644 index 3d9a69d10c..0000000000 --- a/setup.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -import subprocess -import warnings - -try: - from setuptools import Command, find_packages, setup - setuptools_available = True -except ImportError: - from distutils.core import Command, setup - setuptools_available = False - -from devscripts.utils import read_file, read_version - -VERSION = read_version(varname='_pkg_version') - -DESCRIPTION = 'A youtube-dl fork with additional features and patches' - -LONG_DESCRIPTION = '\n\n'.join(( - 'Official repository: ', - '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', - read_file('README.md'))) - -REQUIREMENTS = read_file('requirements.txt').splitlines() - - -def packages(): - if setuptools_available: - return find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts')) - - return [ - 'yt_dlp', 'yt_dlp.extractor', 'yt_dlp.downloader', 'yt_dlp.postprocessor', 'yt_dlp.compat', - ] - - -def py2exe_params(): - warnings.warn( - 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' - 'It is recommended to run "pyinst.py" to build using pyinstaller instead') - - return { - 'console': [{ - 'script': './yt_dlp/__main__.py', - 'dest_base': 'yt-dlp', - 'icon_resources': [(1, 'devscripts/logo.ico')], - }], - 'version_info': { - 'version': VERSION, - 'description': DESCRIPTION, - 'comments': LONG_DESCRIPTION.split('\n')[0], - 'product_name': 'yt-dlp', - 'product_version': VERSION, - }, - 'options': { - 'bundle_files': 0, - 'compressed': 1, - 'optimize': 2, - 'dist_dir': './dist', - 'excludes': [ - # py2exe cannot import Crypto - 'Crypto', - 'Cryptodome', - # py2exe appears to confuse this with our socks library. - # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. - 'urllib3.contrib.socks' - ], - 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], - # Modules that are only imported dynamically must be added here - 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated', - 'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'], - }, - 'zipfile': None, - } - - -def build_params(): - files_spec = [ - ('share/bash-completion/completions', ['completions/bash/yt-dlp']), - ('share/zsh/site-functions', ['completions/zsh/_yt-dlp']), - ('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']), - ('share/doc/yt_dlp', ['README.txt']), - ('share/man/man1', ['yt-dlp.1']) - ] - data_files = [] - for dirname, files in files_spec: - resfiles = [] - for fn in files: - if not os.path.exists(fn): - warnings.warn(f'Skipping file {fn} since it is not present. Try running " make pypi-files " first') - else: - resfiles.append(fn) - data_files.append((dirname, resfiles)) - - params = {'data_files': data_files} - - if setuptools_available: - params['entry_points'] = { - 'console_scripts': ['yt-dlp = yt_dlp:main'], - 'pyinstaller40': ['hook-dirs = yt_dlp.__pyinstaller:get_hook_dirs'], - } - else: - params['scripts'] = ['yt-dlp'] - return params - - -class build_lazy_extractors(Command): - description = 'Build the extractor lazy loading module' - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - if self.dry_run: - print('Skipping build of lazy extractors in dry run mode') - return - subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py']) - - -def main(): - if sys.argv[1:2] == ['py2exe']: - params = py2exe_params() - try: - from py2exe import freeze - except ImportError: - import py2exe # noqa: F401 - warnings.warn('You are using an outdated version of py2exe. Support for this version will be removed in the future') - params['console'][0].update(params.pop('version_info')) - params['options'] = {'py2exe': params.pop('options')} - else: - return freeze(**params) - else: - params = build_params() - - setup( - name='yt-dlp', # package name (do not change/remove comment) - version=VERSION, - maintainer='pukkandan', - maintainer_email='pukkandan.ytdlp@gmail.com', - description=DESCRIPTION, - long_description=LONG_DESCRIPTION, - long_description_content_type='text/markdown', - url='https://github.com/yt-dlp/yt-dlp', - packages=packages(), - install_requires=REQUIREMENTS, - python_requires='>=3.8', - project_urls={ - 'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme', - 'Source': 'https://github.com/yt-dlp/yt-dlp', - 'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues', - 'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators', - }, - classifiers=[ - 'Topic :: Multimedia :: Video', - 'Development Status :: 5 - Production/Stable', - 'Environment :: Console', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: 3.12', - 'Programming Language :: Python :: Implementation', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'License :: Public Domain', - 'Operating System :: OS Independent', - ], - cmdclass={'build_lazy_extractors': build_lazy_extractors}, - **params - ) - - -main() diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index f8488d3041..5d1dd60386 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -257,6 +257,7 @@ from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE +from .boosty import BoostyIE from .bostonglobe import BostonGlobeIE from .box import BoxIE from .boxcast import BoxCastVideoIE @@ -1247,7 +1248,10 @@ NexxIE, NexxEmbedIE, ) -from .nfb import NFBIE +from .nfb import ( + NFBIE, + NFBSeriesIE, +) from .nfhsnetwork import NFHSNetworkIE from .nfl import ( NFLIE, @@ -1284,6 +1288,7 @@ NicovideoTagURLIE, NiconicoLiveIE, ) +from .ninaprotocol import NinaProtocolIE from .ninecninemedia import ( NineCNineMediaIE, CPTwentyFourIE, @@ -1348,6 +1353,12 @@ NYTimesIE, NYTimesArticleIE, NYTimesCookingIE, + NYTimesCookingRecipeIE, +) +from .nuum import ( + NuumLiveIE, + NuumTabIE, + NuumMediaIE, ) from .nuvid import NuvidIE from .nzherald import NZHeraldIE @@ -1390,6 +1401,7 @@ from .orf import ( ORFTVthekIE, ORFFM4StoryIE, + ORFONIE, ORFRadioIE, ORFPodcastIE, ORFIPTVIE, @@ -1514,7 +1526,7 @@ PuhuTVSerieIE, ) from .pr0gramm import Pr0grammIE -from .prankcast import PrankCastIE +from .prankcast import PrankCastIE, PrankCastPostIE from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE @@ -2309,11 +2321,6 @@ WashingtonPostIE, WashingtonPostArticleIE, ) -from .wasdtv import ( - WASDTVStreamIE, - WASDTVRecordIE, - WASDTVClipIE, -) from .wat import WatIE from .wdr import ( WDRIE, @@ -2492,6 +2499,7 @@ Zee5SeriesIE, ) from .zeenews import ZeeNewsIE +from .zetland import ZetlandDKArticleIE from .zhihu import ZhihuIE from .zingmp3 import ( ZingMp3IE, diff --git a/yt_dlp/extractor/antenna.py b/yt_dlp/extractor/antenna.py index c78717aa9e..17a4b6900d 100644 --- a/yt_dlp/extractor/antenna.py +++ b/yt_dlp/extractor/antenna.py @@ -78,14 +78,14 @@ class Ant1NewsGrArticleIE(AntennaBaseIE): _TESTS = [{ 'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', - 'md5': '294f18331bb516539d72d85a82887dcc', + 'md5': '57eb8d12181f0fa2b14b0b138e1de9b6', 'info_dict': { 'id': '_xvg/m_cmbatw=', 'ext': 'mp4', 'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', - 'timestamp': 1603092840, - 'upload_date': '20201019', - 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg', + 'timestamp': 1666166520, + 'upload_date': '20221019', + 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg', }, }, { 'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', @@ -117,7 +117,7 @@ class Ant1NewsGrEmbedIE(AntennaBaseIE): _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P[^#&]+)' _EMBED_REGEX = [rf']+?src=(?P<_q1>["\'])(?P{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] - _API_PATH = '/news/templates/data/jsonPlayer' + _API_PATH = '/templates/data/jsonPlayer' _TESTS = [{ 'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index cd7df69ef0..c138bde3a5 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -7,6 +7,7 @@ import re import time import urllib.parse +import uuid from .common import InfoExtractor, SearchInfoExtractor from ..dependencies import Cryptodome @@ -1304,6 +1305,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'upload_date': '20211127', }, 'playlist_mincount': 513, + }, { + 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz', + 'info_dict': { + 'id': 'BV1DU4y1r7tz', + 'ext': 'mp4', + 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场', + 'upload_date': '20220820', + 'description': '', + 'timestamp': 1661016330, + 'uploader_id': '1958703906', + 'uploader': '靡烟miya', + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'duration': 9552.903, + 'tags': list, + 'comment_count': int, + 'view_count': int, + 'like_count': int, + '_old_archive_ids': ['bilibili 687146339_part1'], + }, + 'params': {'noplaylist': True}, }, { 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1', 'info_dict': { @@ -1355,6 +1376,11 @@ def _extract_medialist(self, query, list_id): def _real_extract(self, url): list_id = self._match_id(url) + + bvid = traverse_obj(parse_qs(url), ('bvid', 0)) + if not self._yes_playlist(list_id, bvid): + return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE) + webpage = self._download_webpage(url, list_id) initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id) if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200: @@ -1464,8 +1490,37 @@ class BiliBiliSearchIE(SearchInfoExtractor): IE_DESC = 'Bilibili video search' _MAX_RESULTS = 100000 _SEARCH_KEY = 'bilisearch' + _TESTS = [{ + 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊', + 'playlist_count': 3, + 'info_dict': { + 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊', + 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'BV1n44y1Q7sc', + 'ext': 'mp4', + 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】', + 'timestamp': 1669889987, + 'upload_date': '20221201', + 'description': 'md5:43343c0973defff527b5a4b403b4abf9', + 'tags': list, + 'uploader': '靡烟miya', + 'duration': 123.156, + 'uploader_id': '1958703906', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 988222410_part1'], + }, + }], + }] def _search_results(self, query): + if not self._get_cookies('https://api.bilibili.com').get('buvid3'): + self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc') for page_num in itertools.count(1): videos = self._download_json( 'https://api.bilibili.com/x/web-interface/search/type', query, diff --git a/yt_dlp/extractor/boosty.py b/yt_dlp/extractor/boosty.py new file mode 100644 index 0000000000..fb14ca1467 --- /dev/null +++ b/yt_dlp/extractor/boosty.py @@ -0,0 +1,209 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class BoostyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?boosty\.to/(?P[^/#?]+)/posts/(?P[^/#?]+)' + _TESTS = [{ + # single ok_video + 'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38', + 'info_dict': { + 'id': 'd7473824-352e-48e2-ae53-d4aa39459968', + 'title': 'phasma_3', + 'channel': 'Kuplinov', + 'channel_id': '7958701', + 'timestamp': 1655031975, + 'upload_date': '20220612', + 'release_timestamp': 1655049000, + 'release_date': '20220612', + 'modified_timestamp': 1668680993, + 'modified_date': '20221117', + 'tags': ['куплинов', 'phasmophobia'], + 'like_count': int, + 'ext': 'mp4', + 'duration': 105, + 'view_count': int, + 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?', + }, + }, { + # multiple ok_video + 'url': 'https://boosty.to/maddyson/posts/0c652798-3b35-471f-8b48-a76a0b28736f', + 'info_dict': { + 'id': '0c652798-3b35-471f-8b48-a76a0b28736f', + 'title': 'то что не пропустил юта6', + 'channel': 'Илья Давыдов', + 'channel_id': '6808257', + 'timestamp': 1694017040, + 'upload_date': '20230906', + 'release_timestamp': 1694017040, + 'release_date': '20230906', + 'modified_timestamp': 1694071178, + 'modified_date': '20230907', + 'like_count': int, + }, + 'playlist_count': 3, + 'playlist': [{ + 'info_dict': { + 'id': 'cc325a9f-a563-41c6-bf47-516c1b506c9a', + 'title': 'то что не пропустил юта6', + 'channel': 'Илья Давыдов', + 'channel_id': '6808257', + 'timestamp': 1694017040, + 'upload_date': '20230906', + 'release_timestamp': 1694017040, + 'release_date': '20230906', + 'modified_timestamp': 1694071178, + 'modified_date': '20230907', + 'like_count': int, + 'ext': 'mp4', + 'duration': 31204, + 'view_count': int, + 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?', + }, + }, { + 'info_dict': { + 'id': 'd07b0a72-9493-4512-b54e-55ce468fd4b7', + 'title': 'то что не пропустил юта6', + 'channel': 'Илья Давыдов', + 'channel_id': '6808257', + 'timestamp': 1694017040, + 'upload_date': '20230906', + 'release_timestamp': 1694017040, + 'release_date': '20230906', + 'modified_timestamp': 1694071178, + 'modified_date': '20230907', + 'like_count': int, + 'ext': 'mp4', + 'duration': 25704, + 'view_count': int, + 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?', + }, + }, { + 'info_dict': { + 'id': '4a3bba32-78c8-422a-9432-2791aff60b42', + 'title': 'то что не пропустил юта6', + 'channel': 'Илья Давыдов', + 'channel_id': '6808257', + 'timestamp': 1694017040, + 'upload_date': '20230906', + 'release_timestamp': 1694017040, + 'release_date': '20230906', + 'modified_timestamp': 1694071178, + 'modified_date': '20230907', + 'like_count': int, + 'ext': 'mp4', + 'duration': 31867, + 'view_count': int, + 'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?', + }, + }], + }, { + # single external video (youtube) + 'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38', + 'info_dict': { + 'id': 'EXelTnve5lY', + 'title': 'Послание Президента Федеральному Собранию | Класс народа', + 'upload_date': '20210425', + 'channel': 'Денис Чужой', + 'tags': 'count:10', + 'like_count': int, + 'ext': 'mp4', + 'duration': 816, + 'view_count': int, + 'thumbnail': r're:^https://i\.ytimg\.com/', + 'age_limit': 0, + 'availability': 'public', + 'categories': list, + 'channel_follower_count': int, + 'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w', + 'channel_is_verified': bool, + 'channel_url': r're:^https://www\.youtube\.com/', + 'comment_count': int, + 'description': str, + 'heatmap': 'count:100', + 'live_status': str, + 'playable_in_embed': bool, + 'uploader': str, + 'uploader_id': str, + 'uploader_url': r're:^https://www\.youtube\.com/', + }, + }] + + _MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd') + + def _extract_formats(self, player_urls, video_id): + formats = [] + quality = qualities(self._MP4_TYPES) + for player_url in traverse_obj(player_urls, lambda _, v: url_or_none(v['url'])): + url = player_url['url'] + format_type = player_url.get('type') + if format_type in ('hls', 'hls_live', 'live_ondemand_hls', 'live_playback_hls'): + formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id='hls', fatal=False)) + elif format_type in ('dash', 'dash_live', 'live_playback_dash'): + formats.extend(self._extract_mpd_formats(url, video_id, mpd_id='dash', fatal=False)) + elif format_type in self._MP4_TYPES: + formats.append({ + 'url': url, + 'ext': 'mp4', + 'format_id': format_type, + 'quality': quality(format_type), + }) + else: + self.report_warning(f'Unknown format type: {format_type!r}') + return formats + + def _real_extract(self, url): + user, post_id = self._match_valid_url(url).group('user', 'post_id') + post = self._download_json( + f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id, + note='Downloading post data', errnote='Unable to download post data') + + post_title = post.get('title') + if not post_title: + self.report_warning('Unable to extract post title. Falling back to parsing html page') + webpage = self._download_webpage(url, video_id=post_id) + post_title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage) + + common_metadata = { + 'title': post_title, + **traverse_obj(post, { + 'channel': ('user', 'name', {str}), + 'channel_id': ('user', 'id', {str_or_none}), + 'timestamp': ('createdAt', {int_or_none}), + 'release_timestamp': ('publishTime', {int_or_none}), + 'modified_timestamp': ('updatedAt', {int_or_none}), + 'tags': ('tags', ..., 'title', {str}), + 'like_count': ('count', 'likes', {int_or_none}), + }), + } + entries = [] + for item in traverse_obj(post, ('data', ..., {dict})): + item_type = item.get('type') + if item_type == 'video' and url_or_none(item.get('url')): + entries.append(self.url_result(item['url'], YoutubeIE)) + elif item_type == 'ok_video': + video_id = item.get('id') or post_id + entries.append({ + 'id': video_id, + 'formats': self._extract_formats(item.get('playerUrls'), video_id), + **common_metadata, + **traverse_obj(item, { + 'title': ('title', {str}), + 'duration': ('duration', {int_or_none}), + 'view_count': ('viewsCounter', {int_or_none}), + 'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}), + }, get_all=False)}) + + if not entries: + raise ExtractorError('No videos found', expected=True) + if len(entries) == 1: + return entries[0] + return self.playlist_result(entries, post_id, post_title, **common_metadata) diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index 88ff82f6e6..ab840f3016 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -1,6 +1,7 @@ from .common import InfoExtractor from ..utils import ( clean_html, + determine_ext, int_or_none, parse_duration, parse_resolution, @@ -60,6 +61,7 @@ def _real_extract(self, url): 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ 'media': media_type, 'idint': media_id, + 'format': 'dm', }) formats = [] @@ -69,6 +71,10 @@ def _real_extract(self, url): format_url = url_or_none(format_.get('file')) if not format_url: continue + if determine_ext(format_url) == 'mpd': + formats.extend(self._extract_mpd_formats( + format_url, media_id, mpd_id='dash', fatal=False)) + continue label = format_.get('label') f = parse_resolution(label) f.update({ diff --git a/yt_dlp/extractor/cineverse.py b/yt_dlp/extractor/cineverse.py index c9fa789b78..032c4334b1 100644 --- a/yt_dlp/extractor/cineverse.py +++ b/yt_dlp/extractor/cineverse.py @@ -67,7 +67,10 @@ def _real_extract(self, url): html = self._download_webpage(url, video_id) idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails'] - if idetails.get('err_code') == 1200: + err_code = idetails.get('err_code') + if err_code == 1002: + self.raise_login_required() + elif err_code == 1200: self.raise_geo_restricted( 'This video is not available from your location due to geo restriction. ' 'You may be able to bypass it by using the /details/ page instead of the /watch/ page', diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py index 4de7e3d530..2ee0730c99 100644 --- a/yt_dlp/extractor/crooksandliars.py +++ b/yt_dlp/extractor/crooksandliars.py @@ -33,10 +33,7 @@ def _real_extract(self, url): webpage = self._download_webpage( 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) - manifest = self._parse_json( - self._search_regex( - r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'), - video_id) + manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id) quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py index aee2dee581..b18a76c7c1 100644 --- a/yt_dlp/extractor/eporner.py +++ b/yt_dlp/extractor/eporner.py @@ -1,8 +1,10 @@ from .common import InfoExtractor from ..utils import ( - encode_base_n, ExtractorError, + encode_base_n, + get_elements_by_class, int_or_none, + join_nonempty, merge_dicts, parse_duration, str_to_int, @@ -81,6 +83,7 @@ def calc_hash(s): sources = video['sources'] formats = [] + has_av1 = bool(get_elements_by_class('download-av1', webpage)) for kind, formats_dict in sources.items(): if not isinstance(formats_dict, dict): continue @@ -106,6 +109,14 @@ def calc_hash(s): 'height': height, 'fps': fps, }) + if has_av1: + formats.append({ + 'url': src.replace('.mp4', '-av1.mp4'), + 'format_id': join_nonempty('av1', format_id), + 'height': height, + 'fps': fps, + 'vcodec': 'av1', + }) json_ld = self._search_json_ld(webpage, display_id, default={}) diff --git a/yt_dlp/extractor/funk.py b/yt_dlp/extractor/funk.py index 539d719c5b..8bdea3fce7 100644 --- a/yt_dlp/extractor/funk.py +++ b/yt_dlp/extractor/funk.py @@ -1,25 +1,29 @@ from .common import InfoExtractor from .nexx import NexxIE -from ..utils import ( - int_or_none, - str_or_none, -) class FunkIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P[0-9a-z-]+)-(?P\d+)' _TESTS = [{ 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', - 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', + 'md5': '8610449476156f338761a75391b0017d', 'info_dict': { 'id': '1155821', 'ext': 'mp4', 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2', - 'description': 'md5:a691d0413ef4835588c5b03ded670c1f', + 'description': 'md5:2a03b67596eda0d1b5125c299f45e953', 'timestamp': 1514507395, 'upload_date': '20171229', + 'duration': 426.0, + 'cast': ['United Creators PMB GmbH'], + 'thumbnail': 'https://assets.nexx.cloud/media/75/56/79/3YKUSJN1LACN0CRxL.jpg', + 'display_id': 'die-lustigsten-instrumente-aus-dem-internet-teil-2', + 'alt_title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet Teil 2', + 'season_number': 0, + 'season': 'Season 0', + 'episode_number': 0, + 'episode': 'Episode 0', }, - }, { 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, @@ -27,18 +31,10 @@ class FunkIE(InfoExtractor): def _real_extract(self, url): display_id, nexx_id = self._match_valid_url(url).groups() - video = self._download_json( - 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id) return { '_type': 'url_transparent', - 'url': 'nexx:741:' + nexx_id, + 'url': f'nexx:741:{nexx_id}', 'ie_key': NexxIE.ie_key(), 'id': nexx_id, - 'title': video.get('title'), - 'description': video.get('description'), - 'duration': int_or_none(video.get('duration')), - 'channel_id': str_or_none(video.get('channelId')), 'display_id': display_id, - 'tags': video.get('tags'), - 'thumbnail': video.get('imageUrlLandscape'), } diff --git a/yt_dlp/extractor/lefigaro.py b/yt_dlp/extractor/lefigaro.py index 9465095db4..a452d87062 100644 --- a/yt_dlp/extractor/lefigaro.py +++ b/yt_dlp/extractor/lefigaro.py @@ -13,7 +13,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor): _TESTS = [{ 'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/', - 'md5': 'e94de44cd80818084352fcf8de1ce82c', + 'md5': 'a0c3069b7e4c4526abf0053a7713f56f', 'info_dict': { 'id': 'g9j7Eovo', 'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées', @@ -26,7 +26,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor): }, }, { 'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/', - 'md5': '0b3f10332b812034b3a3eda1ef877c5f', + 'md5': '319c662943dd777bab835cae1e2d73a5', 'info_dict': { 'id': 'LeAgybyc', 'title': 'Intelligence artificielle : faut-il s’en méfier ?', @@ -41,7 +41,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor): _WEBPAGE_TESTS = [{ 'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/', - 'md5': '3972ddf2d5f8b98699f191687258e2f9', + 'md5': '6289f9489efb969e38245f31721596fe', 'info_dict': { 'id': 'QChnbPYA', 'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International', @@ -55,7 +55,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor): }, }, { 'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/', - 'md5': '3ac0a0769546ee6be41ab52caea5d9a9', + 'md5': 'f6df814cae53e85937621599d2967520', 'info_dict': { 'id': 'QJzqoNbf', 'title': 'La philosophe Nathalie Sarthou-Lajus est l’invitée du Figaro Live', @@ -73,7 +73,8 @@ def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']['playerData'] + player_data = self._search_nextjs_data( + webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData'] return self.url_result( f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'), diff --git a/yt_dlp/extractor/magellantv.py b/yt_dlp/extractor/magellantv.py index 0947a450a6..6f2524ba22 100644 --- a/yt_dlp/extractor/magellantv.py +++ b/yt_dlp/extractor/magellantv.py @@ -28,12 +28,24 @@ class MagellanTVIE(InfoExtractor): 'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'], }, 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.magellantv.com/watch/celebration-nation', + 'info_dict': { + 'id': 'celebration-nation', + 'ext': 'mp4', + 'tags': ['Art & Culture', 'Human Interest', 'Anthropology', 'China', 'History'], + 'duration': 2640.0, + 'title': 'Ancestors', + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail'] + data = traverse_obj(self._search_nextjs_data(webpage, video_id), ( + 'props', 'pageProps', 'reactContext', + (('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False) formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id) return { diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py index 38e068af41..6f78728253 100644 --- a/yt_dlp/extractor/nfb.py +++ b/yt_dlp/extractor/nfb.py @@ -1,10 +1,54 @@ from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + join_nonempty, + merge_dicts, + parse_count, + url_or_none, + urljoin, +) +from ..utils.traversal import traverse_obj -class NFBIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P[^/?#&]+)' +class NFBBaseIE(InfoExtractor): + _VALID_URL_BASE = r'https?://(?:www\.)?(?Pnfb|onf)\.ca' + _GEO_COUNTRIES = ['CA'] + + def _extract_ep_data(self, webpage, video_id, fatal=False): + return self._search_json( + r'const\s+episodesData\s*=', webpage, 'episode data', video_id, + contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or [] + + def _extract_ep_info(self, data, video_id, slug=None): + info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], { + 'description': ('description', {str}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + 'uploader': ('data_layer', 'episodeMaker', {str}), + 'release_year': ('data_layer', 'episodeYear', {int_or_none}), + 'episode': ('data_layer', 'episodeTitle', {str}), + 'season': ('data_layer', 'seasonTitle', {str}), + 'season_number': ('data_layer', 'seasonTitle', {parse_count}), + 'series': ('data_layer', 'seriesTitle', {str}), + }), get_all=False) + + return { + **info, + 'id': video_id, + 'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '), + 'episode_number': int_or_none(self._search_regex( + r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)), + } + + +class NFBIE(NFBBaseIE): + IE_NAME = 'nfb' + IE_DESC = 'nfb.ca and onf.ca films and episodes' + _VALID_URL = [ + rf'{NFBBaseIE._VALID_URL_BASE}/(?Pfilm)/(?P[^/?#&]+)', + rf'{NFBBaseIE._VALID_URL_BASE}/(?Pseries?)/(?P[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)', + ] _TESTS = [{ + 'note': 'NFB film', 'url': 'https://www.nfb.ca/film/trafficopter/', 'info_dict': { 'id': 'trafficopter', @@ -14,29 +58,192 @@ class NFBIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Barrie Howells', 'release_year': 1972, + 'duration': 600.0, }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'ONF film', + 'url': 'https://www.onf.ca/film/mal-du-siecle/', + 'info_dict': { + 'id': 'mal-du-siecle', + 'ext': 'mp4', + 'title': 'Le mal du siècle', + 'description': 'md5:1abf774d77569ebe603419f2d344102b', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Catherine Lepage', + 'release_year': 2019, + 'duration': 300.0, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'NFB episode with English title', + 'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/', + 'info_dict': { + 'id': 'true-north-episode9-true-north-finale-making-it', + 'ext': 'mp4', + 'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It', + 'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.', + 'series': 'True North: Inside the Rise of Toronto Basketball', + 'release_year': 2018, + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Finale: Making It', + 'episode_number': 9, + 'uploader': 'Ryan Sidhoo', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'ONF episode with French title', + 'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/', + 'info_dict': { + 'id': 'direction-nord-episode-9', + 'ext': 'mp4', + 'title': 'Direction nord – La montée du basketball à Toronto - Finale : Réussir', + 'description': 'md5:349a57419b71432b97bf6083d92b029d', + 'series': 'Direction nord – La montée du basketball à Toronto', + 'release_year': 2018, + 'season': 'Saison 1', + 'season_number': 1, + 'episode': 'Finale : Réussir', + 'episode_number': 9, + 'uploader': 'Ryan Sidhoo', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'NFB episode with French title (needs geo-bypass)', + 'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/', + 'info_dict': { + 'id': 'etoile-du-nord-episode-1-lobservation', + 'ext': 'mp4', + 'title': 'Étoile du Nord - L\'observation', + 'description': 'md5:161a4617260dee3de70f509b2c9dd21b', + 'series': 'Étoile du Nord', + 'release_year': 2023, + 'season': 'Saison 1', + 'season_number': 1, + 'episode': 'L\'observation', + 'episode_number': 1, + 'uploader': 'Patrick Bossé', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'ONF episode with English title (needs geo-bypass)', + 'url': 'https://www.onf.ca/serie/north-star/season1/episode1/', + 'info_dict': { + 'id': 'north-star-episode-1-observation', + 'ext': 'mp4', + 'title': 'North Star - Observation', + 'description': 'md5:c727f370839d8a817392b9e3f23655c7', + 'series': 'North Star', + 'release_year': 2023, + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Observation', + 'episode_number': 1, + 'uploader': 'Patrick Bossé', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)', + 'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/', + 'info_dict': { + 'id': 'north-star-episode-1-observation', + 'ext': 'mp4', + 'title': 'North Star - Observation', + 'description': 'md5:c727f370839d8a817392b9e3f23655c7', + 'series': 'North Star', + 'release_year': 2023, + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Observation', + 'episode_number': 1, + 'uploader': 'Patrick Bossé', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)', + 'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/', + 'info_dict': { + 'id': 'etoile-du-nord-episode-1-lobservation', + 'ext': 'mp4', + 'title': 'Étoile du Nord - L\'observation', + 'description': 'md5:161a4617260dee3de70f509b2c9dd21b', + 'series': 'Étoile du Nord', + 'release_year': 2023, + 'season': 'Saison 1', + 'season_number': 1, + 'episode': 'L\'observation', + 'episode_number': 1, + 'uploader': 'Patrick Bossé', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'Season 2 episode w/o episode num in id, extract from json ld', + 'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours', + 'info_dict': { + 'id': 'liste-des-choses-qui-existent-saison-2-ours', + 'ext': 'mp4', + 'title': 'La liste des choses qui existent - L\'ours en peluche', + 'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a', + 'series': 'La liste des choses qui existent', + 'release_year': 2022, + 'season': 'Saison 2', + 'season_number': 2, + 'episode': 'L\'ours en peluche', + 'episode_number': 12, + 'uploader': 'Francis Papillon', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'NFB film /embed/player/ page', + 'url': 'https://www.nfb.ca/film/afterlife/embed/player/', + 'info_dict': { + 'id': 'afterlife', + 'ext': 'mp4', + 'title': 'Afterlife', + 'description': 'md5:84951394f594f1fb1e62d9c43242fdf5', + 'release_year': 1978, + 'duration': 420.0, + 'uploader': 'Ishu Patel', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): - video_id = self._match_id(url) + site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id') + # Need to construct the URL since we match /embed/player/ URLs as well + webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug) + # type_ can change from film to serie(s) after redirect; new slug may have episode number + type_, slug = self._match_valid_url(urlh.url).group('type', 'id') - webpage = self._download_webpage('https://www.nfb.ca/film/%s/' % video_id, video_id) + embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex( + r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url')) + video_id = self._match_id(embed_url) # embed url has unique slug + player = self._download_webpage(embed_url, video_id, 'Downloading player page') + if 'MESSAGE_GEOBLOCKED' in player: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - iframe = self._html_search_regex( - r'<[^>]+\bid=["\']player-iframe["\'][^>]*src=["\']([^"\']+)', - webpage, 'iframe', default=None, fatal=True) - if iframe.startswith('/'): - iframe = f'https://www.nfb.ca{iframe}' + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'), + video_id, 'mp4', m3u8_id='hls') - player = self._download_webpage(iframe, video_id) + if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False) + for fmt in fmts: + fmt['format_note'] = 'described video' + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) - source = self._html_search_regex( - r'source:\s*\'([^\']+)', - player, 'source', default=None, fatal=True) - - formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4') - - return { + info = { 'id': video_id, 'title': self._html_search_regex( r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*]*>\s*([^<]+?)\s*', @@ -45,14 +252,49 @@ def _real_extract(self, url): r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*]*>\s*([^<]+)', webpage, 'description', default=None), 'thumbnail': self._html_search_regex( - r'poster:\s*\'([^\']+)', - player, 'thumbnail', default=None), + r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None), 'uploader': self._html_search_regex( - r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', - webpage, 'uploader', default=None), + r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None), 'release_year': int_or_none(self._html_search_regex( r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)', webpage, 'release_year', default=None)), + } if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id) + + return merge_dicts({ 'formats': formats, 'subtitles': subtitles, - } + }, info, self._search_json_ld(webpage, video_id, default={})) + + +class NFBSeriesIE(NFBBaseIE): + IE_NAME = 'nfb:series' + IE_DESC = 'nfb.ca and onf.ca series' + _VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?Pseries?)/(?P[^/?#&]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/', + 'playlist_mincount': 9, + 'info_dict': { + 'id': 'true-north-inside-the-rise-of-toronto-basketball', + }, + }, { + 'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/', + 'playlist_mincount': 26, + 'info_dict': { + 'id': 'la-liste-des-choses-qui-existent-serie', + }, + }] + + def _entries(self, episodes): + for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])): + mobj = NFBIE._match_valid_url(episode['embed_url']) + yield self.url_result( + mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id'))) + + def _real_extract(self, url): + site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id') + season_path = 'saison' if type_ == 'serie' else 'season' + webpage = self._download_webpage( + f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id) + episodes = self._extract_ep_data(webpage, series_id, fatal=True) + + return self.playlist_result(self._entries(episodes), series_id) diff --git a/yt_dlp/extractor/ninaprotocol.py b/yt_dlp/extractor/ninaprotocol.py new file mode 100644 index 0000000000..ea57c5f383 --- /dev/null +++ b/yt_dlp/extractor/ninaprotocol.py @@ -0,0 +1,225 @@ +from .common import InfoExtractor +from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none +from ..utils.traversal import traverse_obj + + +class NinaProtocolIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P[^/#?]+)' + _TESTS = [{ + 'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ', + 'info_dict': { + 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ', + 'title': 'The Spatulas - March Chant', + 'tags': ['punk', 'postpresentmedium', 'cambridge'], + 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', + 'channel': 'ppm', + 'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db', + 'album': 'The Spatulas - March Chant', + 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50', + 'timestamp': 1701417610, + 'uploader': 'ppmrecs', + 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', + 'display_id': 'the-spatulas-march-chant', + 'upload_date': '20231201', + 'album_artist': 'Post Present Medium ', + }, + 'playlist': [{ + 'info_dict': { + 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1', + 'title': 'March Chant In April', + 'track': 'March Chant In April', + 'ext': 'mp3', + 'duration': 152, + 'track_number': 1, + 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', + 'uploader': 'ppmrecs', + 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50', + 'timestamp': 1701417610, + 'channel': 'ppm', + 'album': 'The Spatulas - March Chant', + 'tags': ['punk', 'postpresentmedium', 'cambridge'], + 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', + 'upload_date': '20231201', + 'album_artist': 'Post Present Medium ', + } + }, { + 'info_dict': { + 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2', + 'title': 'Rescue Mission', + 'track': 'Rescue Mission', + 'ext': 'mp3', + 'duration': 212, + 'track_number': 2, + 'album_artist': 'Post Present Medium ', + 'uploader': 'ppmrecs', + 'tags': ['punk', 'postpresentmedium', 'cambridge'], + 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50', + 'channel': 'ppm', + 'upload_date': '20231201', + 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', + 'timestamp': 1701417610, + 'album': 'The Spatulas - March Chant', + 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', + } + }, { + 'info_dict': { + 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3', + 'title': 'Slinger Style', + 'track': 'Slinger Style', + 'ext': 'mp3', + 'duration': 179, + 'track_number': 3, + 'timestamp': 1701417610, + 'upload_date': '20231201', + 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', + 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', + 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50', + 'album_artist': 'Post Present Medium ', + 'album': 'The Spatulas - March Chant', + 'tags': ['punk', 'postpresentmedium', 'cambridge'], + 'uploader': 'ppmrecs', + 'channel': 'ppm', + } + }, { + 'info_dict': { + 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4', + 'title': 'Psychic Signal', + 'track': 'Psychic Signal', + 'ext': 'mp3', + 'duration': 220, + 'track_number': 4, + 'tags': ['punk', 'postpresentmedium', 'cambridge'], + 'upload_date': '20231201', + 'album': 'The Spatulas - March Chant', + 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50', + 'timestamp': 1701417610, + 'album_artist': 'Post Present Medium ', + 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', + 'channel': 'ppm', + 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', + 'uploader': 'ppmrecs', + } + }, { + 'info_dict': { + 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5', + 'title': 'Curvy Color', + 'track': 'Curvy Color', + 'ext': 'mp3', + 'duration': 148, + 'track_number': 5, + 'timestamp': 1701417610, + 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', + 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50', + 'album': 'The Spatulas - March Chant', + 'album_artist': 'Post Present Medium ', + 'channel': 'ppm', + 'tags': ['punk', 'postpresentmedium', 'cambridge'], + 'uploader': 'ppmrecs', + 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', + 'upload_date': '20231201', + } + }, { + 'info_dict': { + 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6', + 'title': 'Caveman Star', + 'track': 'Caveman Star', + 'ext': 'mp3', + 'duration': 121, + 'track_number': 6, + 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', + 'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50', + 'tags': ['punk', 'postpresentmedium', 'cambridge'], + 'album_artist': 'Post Present Medium ', + 'uploader': 'ppmrecs', + 'timestamp': 1701417610, + 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', + 'album': 'The Spatulas - March Chant', + 'channel': 'ppm', + 'upload_date': '20231201', + }, + }], + }, { + 'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield', + 'info_dict': { + 'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J', + 'description': 'md5:63f08d5db558b4b36e1896f317062721', + 'title': 'F.G.S. - American Shield', + 'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci', + 'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE', + 'channel': 'tinkscough', + 'tags': [], + 'album_artist': 'F.G.S.', + 'album': 'F.G.S. - American Shield', + 'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8', + 'display_id': 'f-g-s-american-shield', + 'uploader': 'flannerysilva', + 'timestamp': 1702395858, + 'upload_date': '20231212', + }, + 'playlist_count': 1, + }, { + 'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out', + 'info_dict': { + 'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh', + 'display_id': 'time-to-figure-things-out', + 'description': 'md5:960202ed01c3134bb8958f1008527e35', + 'timestamp': 1706283607, + 'title': 'DJ STEPDAD - time to figure things out', + 'album_artist': 'DJ STEPDAD', + 'uploader': 'tddvsss', + 'upload_date': '20240126', + 'album': 'time to figure things out', + 'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi', + 'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss', + 'tags': [], + }, + 'playlist_count': 4, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + release = self._download_json( + f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release'] + + video_id = release.get('publicKey') or video_id + + common_info = traverse_obj(release, { + 'album': ('metadata', 'properties', 'title', {str}), + 'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}), + 'timestamp': ('datetime', {parse_iso8601}), + 'thumbnail': ('metadata', 'image', {url_or_none}), + 'uploader': ('publisherAccount', 'handle', {str}), + 'uploader_id': ('publisherAccount', 'publicKey', {str}), + 'channel': ('hub', 'handle', {str}), + 'channel_id': ('hub', 'publicKey', {str}), + }, get_all=False) + common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str})) + + entries = [] + for track_num, track in enumerate(traverse_obj(release, ( + 'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1): + entries.append({ + 'id': f'{video_id}_{track_num}', + 'url': track['uri'], + **traverse_obj(track, { + 'title': ('track_title', {str}), + 'track': ('track_title', {str}), + 'ext': ('type', {mimetype2ext}), + 'track_number': ('track', {int_or_none}), + 'duration': ('duration', {int_or_none}), + }), + 'vcodec': 'none', + **common_info, + }) + + return { + '_type': 'playlist', + 'id': video_id, + 'entries': entries, + **traverse_obj(release, { + 'display_id': ('slug', {str}), + 'title': ('metadata', 'name', {str}), + 'description': ('metadata', 'description', {str}), + }), + **common_info, + } diff --git a/yt_dlp/extractor/nuum.py b/yt_dlp/extractor/nuum.py new file mode 100644 index 0000000000..3db663ded0 --- /dev/null +++ b/yt_dlp/extractor/nuum.py @@ -0,0 +1,199 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + OnDemandPagedList, + UserNotLive, + filter_dict, + int_or_none, + parse_iso8601, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class NuumBaseIE(InfoExtractor): + def _call_api(self, path, video_id, description, query={}): + response = self._download_json( + f'https://nuum.ru/api/v2/{path}', video_id, query=query, + note=f'Downloading {description} metadata', + errnote=f'Unable to download {description} metadata') + if error := response.get('error'): + raise ExtractorError(f'API returned error: {error!r}') + return response['result'] + + def _get_channel_info(self, channel_name): + return self._call_api( + 'broadcasts/public', video_id=channel_name, description='channel', + query={ + 'with_extra': 'true', + 'channel_name': channel_name, + 'with_deleted': 'true', + }) + + def _parse_video_data(self, container, extract_formats=True): + stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {} + media = traverse_obj(stream, ('stream_media', 0, {dict})) or {} + media_url = traverse_obj(media, ( + 'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False) + + video_id = str(container['media_container_id']) + is_live = media.get('media_status') == 'RUNNING' + + formats, subtitles = None, None + if extract_formats: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + media_url, video_id, 'mp4', live=is_live) + + return filter_dict({ + 'id': video_id, + 'is_live': is_live, + 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(container, { + 'title': ('media_container_name', {str}), + 'description': ('media_container_description', {str}), + 'timestamp': ('created_at', {parse_iso8601}), + 'channel': ('media_container_channel', 'channel_name', {str}), + 'channel_id': ('media_container_channel', 'channel_id', {str_or_none}), + }), + **traverse_obj(stream, { + 'view_count': ('stream_total_viewers', {int_or_none}), + 'concurrent_view_count': ('stream_current_viewers', {int_or_none}), + }), + **traverse_obj(media, { + 'duration': ('media_duration', {int_or_none}), + 'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}), + }, get_all=False), + }) + + +class NuumMediaIE(NuumBaseIE): + IE_NAME = 'nuum:media' + _VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P[\d]+)' + _TESTS = [{ + 'url': 'https://nuum.ru/streams/1592713-7-days-to-die', + 'only_matching': True, + }, { + 'url': 'https://nuum.ru/videos/1567547-toxi-hurtz', + 'md5': 'f1d9118a30403e32b702a204eb03aca3', + 'info_dict': { + 'id': '1567547', + 'ext': 'mp4', + 'title': 'Toxi$ - Hurtz', + 'description': '', + 'timestamp': 1702631651, + 'upload_date': '20231215', + 'thumbnail': r're:^https?://.+\.jpg', + 'view_count': int, + 'concurrent_view_count': int, + 'channel_id': '6911', + 'channel': 'toxis', + 'duration': 116, + }, + }, { + 'url': 'https://nuum.ru/clips/1552564-pro-misu', + 'md5': 'b248ae1565b1e55433188f11beeb0ca1', + 'info_dict': { + 'id': '1552564', + 'ext': 'mp4', + 'title': 'Про Мису 🙃', + 'timestamp': 1701971828, + 'upload_date': '20231207', + 'thumbnail': r're:^https?://.+\.jpg', + 'view_count': int, + 'concurrent_view_count': int, + 'channel_id': '3320', + 'channel': 'Misalelik', + 'duration': 41, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media') + + return self._parse_video_data(video_data) + + +class NuumLiveIE(NuumBaseIE): + IE_NAME = 'nuum:live' + _VALID_URL = r'https?://nuum\.ru/channel/(?P[^/#?]+)/?(?:$|[#?])' + _TESTS = [{ + 'url': 'https://nuum.ru/channel/mts_live', + 'only_matching': True, + }] + + def _real_extract(self, url): + channel = self._match_id(url) + channel_info = self._get_channel_info(channel) + if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False: + raise UserNotLive(video_id=channel) + + info = self._parse_video_data(channel_info['media_container']) + return { + 'webpage_url': f'https://nuum.ru/streams/{info["id"]}', + 'extractor_key': NuumMediaIE.ie_key(), + 'extractor': NuumMediaIE.IE_NAME, + **info, + } + + +class NuumTabIE(NuumBaseIE): + IE_NAME = 'nuum:tab' + _VALID_URL = r'https?://nuum\.ru/channel/(?P[^/#?]+)/(?Pstreams|videos|clips)' + _TESTS = [{ + 'url': 'https://nuum.ru/channel/dankon_/clips', + 'info_dict': { + 'id': 'dankon__clips', + 'title': 'Dankon_', + }, + 'playlist_mincount': 29, + }, { + 'url': 'https://nuum.ru/channel/dankon_/videos', + 'info_dict': { + 'id': 'dankon__videos', + 'title': 'Dankon_', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://nuum.ru/channel/dankon_/streams', + 'info_dict': { + 'id': 'dankon__streams', + 'title': 'Dankon_', + }, + 'playlist_mincount': 1, + }] + + _PAGE_SIZE = 50 + + def _fetch_page(self, channel_id, tab_type, tab_id, page): + CONTAINER_TYPES = { + 'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'], + 'videos': ['LONG_VIDEO'], + 'streams': ['SINGLE'], + } + + media_containers = self._call_api( + 'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}', + query={ + 'limit': self._PAGE_SIZE, + 'offset': page * self._PAGE_SIZE, + 'channel_id': channel_id, + 'media_container_status': 'STOPPED', + 'media_container_type': CONTAINER_TYPES[tab_type], + }) + for container in traverse_obj(media_containers, (..., {dict})): + metadata = self._parse_video_data(container, extract_formats=False) + yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata) + + def _real_extract(self, url): + channel_name, tab_type = self._match_valid_url(url).group('id', 'type') + tab_id = f'{channel_name}_{tab_type}' + channel_data = self._get_channel_info(channel_name)['channel'] + + return self.playlist_result(OnDemandPagedList(functools.partial( + self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE), + playlist_id=tab_id, playlist_title=channel_data.get('channel_name')) diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index 2e21edbb41..3019202a2e 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -1,50 +1,93 @@ -import hmac -import hashlib -import base64 +import json +import uuid from .common import InfoExtractor from ..utils import ( + ExtractorError, + clean_html, determine_ext, + extract_attributes, float_or_none, + get_elements_html_by_class, int_or_none, - js_to_json, + merge_dicts, mimetype2ext, parse_iso8601, + remove_end, remove_start, + str_or_none, + traverse_obj, + url_or_none, ) class NYTimesBaseIE(InfoExtractor): - _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v' + _DNS_NAMESPACE = uuid.UUID('36dd619a-56dc-595b-9e09-37f4152c7b5d') + _TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB' + _GRAPHQL_API = 'https://samizdat-graphql.nytimes.com/graphql/v2' + _GRAPHQL_QUERY = '''query VideoQuery($id: String!) { + video(id: $id) { + ... on Video { + bylines { + renderedRepresentation + } + duration + firstPublished + promotionalHeadline + promotionalMedia { + ... on Image { + crops { + name + renditions { + name + width + height + url + } + } + } + } + renditions { + type + width + height + url + bitrate + } + summary + } + } +}''' - def _extract_video_from_id(self, video_id): - # Authorization generation algorithm is reverse engineered from `signer` in - # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js - path = '/svc/video/api/v3/video/' + video_id - hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest() - video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={ - 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(), - 'X-NYTV': 'vhs', - }, fatal=False) - if not video_data: - video_data = self._download_json( - 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id, - video_id, 'Downloading video JSON') + def _call_api(self, media_id): + # reference: `id-to-uri.js` + video_uuid = uuid.uuid5(self._DNS_NAMESPACE, 'video') + media_uuid = uuid.uuid5(video_uuid, media_id) - title = video_data['headline'] + return traverse_obj(self._download_json( + self._GRAPHQL_API, media_id, 'Downloading JSON from GraphQL API', data=json.dumps({ + 'query': self._GRAPHQL_QUERY, + 'variables': {'id': f'nyt://video/{media_uuid}'}, + }, separators=(',', ':')).encode(), headers={ + 'Content-Type': 'application/json', + 'Nyt-App-Type': 'vhs', + 'Nyt-App-Version': 'v3.52.21', + 'Nyt-Token': self._TOKEN, + 'Origin': 'https://nytimes.com', + }, fatal=False), ('data', 'video', {dict})) or {} - def get_file_size(file_size): - if isinstance(file_size, int): - return file_size - elif isinstance(file_size, dict): - return int(file_size.get('value', 0)) - else: - return None + def _extract_thumbnails(self, thumbs): + return traverse_obj(thumbs, (lambda _, v: url_or_none(v['url']), { + 'url': 'url', + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), default=None) + def _extract_formats_and_subtitles(self, video_id, content_media_json): urls = [] formats = [] subtitles = {} - for video in video_data.get('renditions', []): + for video in traverse_obj(content_media_json, ('renditions', ..., {dict})): video_url = video.get('url') format_id = video.get('type') if not video_url or format_id == 'thumbs' or video_url in urls: @@ -56,11 +99,9 @@ def get_file_size(file_size): video_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id or 'hls', fatal=False) formats.extend(m3u8_fmts) - subtitles = self._merge_subtitles(subtitles, m3u8_subs) + self._merge_subtitles(m3u8_subs, target=subtitles) elif ext == 'mpd': - continue - # formats.extend(self._extract_mpd_formats( - # video_url, video_id, format_id or 'dash', fatal=False)) + continue # all mpd urls give 404 errors else: formats.append({ 'url': video_url, @@ -68,55 +109,50 @@ def get_file_size(file_size): 'vcodec': video.get('videoencoding') or video.get('video_codec'), 'width': int_or_none(video.get('width')), 'height': int_or_none(video.get('height')), - 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')), + 'filesize': traverse_obj(video, ( + ('file_size', 'fileSize'), (None, ('value')), {int_or_none}), get_all=False), 'tbr': int_or_none(video.get('bitrate'), 1000) or None, 'ext': ext, }) - thumbnails = [] - for image in video_data.get('images', []): - image_url = image.get('url') - if not image_url: - continue - thumbnails.append({ - 'url': 'http://www.nytimes.com/' + image_url, - 'width': int_or_none(image.get('width')), - 'height': int_or_none(image.get('height')), - }) + return formats, subtitles - publication_date = video_data.get('publication_date') - timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None + def _extract_video(self, media_id): + data = self._call_api(media_id) + formats, subtitles = self._extract_formats_and_subtitles(media_id, data) return { - 'id': video_id, - 'title': title, - 'description': video_data.get('summary'), - 'timestamp': timestamp, - 'uploader': video_data.get('byline'), - 'duration': float_or_none(video_data.get('duration'), 1000), + 'id': media_id, + 'title': data.get('promotionalHeadline'), + 'description': data.get('summary'), + 'timestamp': parse_iso8601(data.get('firstPublished')), + 'duration': float_or_none(data.get('duration'), scale=1000), + 'creator': ', '.join(traverse_obj(data, ( # TODO: change to 'creators' + 'bylines', ..., 'renderedRepresentation', {lambda x: remove_start(x, 'By ')}))), 'formats': formats, 'subtitles': subtitles, - 'thumbnails': thumbnails, + 'thumbnails': self._extract_thumbnails( + traverse_obj(data, ('promotionalMedia', 'crops', ..., 'renditions', ...))), } class NYTimesIE(NYTimesBaseIE): _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P\d+)' _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>'] - _TESTS = [{ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', - 'md5': 'd665342765db043f7e225cff19df0f2d', + 'md5': 'a553aa344014e3723d33893d89d4defc', 'info_dict': { 'id': '100000002847155', - 'ext': 'mov', + 'ext': 'mp4', 'title': 'Verbatim: What Is a Photocopier?', 'description': 'md5:93603dada88ddbda9395632fdc5da260', - 'timestamp': 1398631707, - 'upload_date': '20140427', - 'uploader': 'Brett Weiner', + 'timestamp': 1398646132, + 'upload_date': '20140428', + 'creator': 'Brett Weiner', + 'thumbnail': r're:https?://\w+\.nyt.com/images/.+\.jpg', 'duration': 419, - } + }, }, { 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html', 'only_matching': True, @@ -125,138 +161,260 @@ class NYTimesIE(NYTimesBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - return self._extract_video_from_id(video_id) + return self._extract_video(video_id) class NYTimesArticleIE(NYTimesBaseIE): - _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?[^.]+)(?:\.html)?' + _VALID_URL = r'https?://(?:www\.)?nytimes\.com/\d{4}/\d{2}/\d{2}/(?!books|podcasts)[^/?#]+/(?:\w+/)?(?P[^./?#]+)(?:\.html)?' _TESTS = [{ 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0', - 'md5': 'e2076d58b4da18e6a001d53fd56db3c9', + 'md5': '3eb5ddb1d6f86254fe4f233826778737', 'info_dict': { 'id': '100000003628438', - 'ext': 'mov', - 'title': 'New Minimum Wage: $70,000 a Year', - 'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.', - 'timestamp': 1429033037, + 'ext': 'mp4', + 'title': 'One Company’s New Minimum Wage: $70,000 a Year', + 'description': 'md5:89ba9ab67ca767bb92bf823d1f138433', + 'timestamp': 1429047468, 'upload_date': '20150414', 'uploader': 'Matthew Williams', - } + 'creator': 'Patricia Cohen', + 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', + 'duration': 119.0, + }, }, { - 'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html', - 'md5': 'e0d52040cafb07662acf3c9132db3575', + # article with audio and no video + 'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html', + 'md5': '2365b3555c8aa7f4dd34ca735ad02e6a', 'info_dict': { - 'id': '100000004709062', - 'title': 'The Run-Up: ‘He Was Like an Octopus’', + 'id': '100000009110381', 'ext': 'mp3', - 'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4', - 'series': 'The Run-Up', - 'episode': '‘He Was Like an Octopus’', - 'episode_number': 20, - 'duration': 2130, - } + 'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?', + 'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e', + 'timestamp': 1695960700, + 'upload_date': '20230929', + 'creator': 'Stephanie Nolen, Natalija Gormalova', + 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', + 'duration': 1322, + }, }, { - 'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html', + 'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html', + 'md5': '3eb5ddb1d6f86254fe4f233826778737', 'info_dict': { - 'id': '100000004709479', - 'title': 'The Rise of Hitler', - 'ext': 'mp3', - 'description': 'md5:bce877fd9e3444990cb141875fab0028', - 'creator': 'Pamela Paul', - 'duration': 3475, + 'id': '100000009202270', + 'ext': 'mp4', + 'title': 'Kamala Harris Defends Biden Policies, but Says ‘More Work’ Needed to Reach Voters', + 'description': 'md5:de4212a7e19bb89e4fb14210ca915f1f', + 'timestamp': 1701290997, + 'upload_date': '20231129', + 'uploader': 'By The New York Times', + 'creator': 'Katie Rogers', + 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', + 'duration': 97.631, }, 'params': { - 'skip_download': True, + 'skip_download': 'm3u8', }, }, { - 'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1', + # multiple videos in the same article + 'url': 'https://www.nytimes.com/2023/12/02/business/air-traffic-controllers-safety.html', + 'info_dict': { + 'id': 'air-traffic-controllers-safety', + 'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink', + 'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d', + 'upload_date': '20231202', + 'creator': 'Emily Steel, Sydney Ember', + 'timestamp': 1701511264, + }, + 'playlist_count': 3, + }, { + 'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html', 'only_matching': True, }] - def _extract_podcast_from_json(self, json, page_id, webpage): - podcast_audio = self._parse_json( - json, page_id, transform_source=js_to_json) + def _extract_content_from_block(self, block): + details = traverse_obj(block, { + 'id': ('sourceId', {str}), + 'uploader': ('bylines', ..., 'renderedRepresentation', {str}), + 'duration': (None, (('duration', {lambda x: float_or_none(x, scale=1000)}), ('length', {int_or_none}))), + 'timestamp': ('firstPublished', {parse_iso8601}), + 'series': ('podcastSeries', {str}), + }, get_all=False) - audio_data = podcast_audio['data'] - track = audio_data['track'] - - episode_title = track['title'] - video_url = track['source'] - - description = track.get('description') or self._html_search_meta( - ['og:description', 'twitter:description'], webpage) - - podcast_title = audio_data.get('podcast', {}).get('title') - title = ('%s: %s' % (podcast_title, episode_title) - if podcast_title else episode_title) - - episode = audio_data.get('podcast', {}).get('episode') or '' - episode_number = int_or_none(self._search_regex( - r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None)) + formats, subtitles = self._extract_formats_and_subtitles(details.get('id'), block) + # audio articles will have an url and no formats + url = traverse_obj(block, ('fileUrl', {url_or_none})) + if not formats and url: + formats.append({'url': url, 'vcodec': 'none'}) return { - 'id': remove_start(podcast_audio.get('target'), 'FT') or page_id, - 'url': video_url, - 'title': title, - 'description': description, - 'creator': track.get('credit'), - 'series': podcast_title, - 'episode': episode_title, - 'episode_number': episode_number, - 'duration': int_or_none(track.get('duration')), + **details, + 'thumbnails': self._extract_thumbnails(traverse_obj( + block, ('promotionalMedia', 'crops', ..., 'renditions', ...))), + 'formats': formats, + 'subtitles': subtitles } def _real_extract(self, url): page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) + art_json = self._search_json( + r'window\.__preloadedData\s*=', webpage, 'media details', page_id, + transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article'] - video_id = self._search_regex( - r'data-videoid=["\'](\d+)', webpage, 'video id', - default=None, fatal=False) - if video_id is not None: - return self._extract_video_from_id(video_id) + blocks = traverse_obj(art_json, ( + 'sprinkledBody', 'content', ..., ('ledeMedia', None), + lambda _, v: v['__typename'] in ('Video', 'Audio'))) + if not blocks: + raise ExtractorError('Unable to extract any media blocks from webpage') - podcast_data = self._search_regex( - (r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s* 1: + return self.playlist_result(entries, page_id, **common_info) + + return { + 'id': page_id, + **entries[0], + } class NYTimesCookingIE(NYTimesBaseIE): - _VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P\d+)' + IE_NAME = 'NYTimesCookingGuide' + _VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P[\w-]+)' _TESTS = [{ - 'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart', - 'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3', + 'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey', 'info_dict': { - 'id': '100000004756089', - 'ext': 'mov', - 'timestamp': 1479383008, - 'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON', - 'title': 'Cranberry Tart', - 'upload_date': '20161117', - 'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.', + 'id': '13-how-to-cook-a-turkey', + 'title': 'How to Cook a Turkey', + 'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0', + }, + 'playlist_count': 2, + }, { + # single video example + 'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese', + 'md5': '64415805fe0b8640fce6b0b9def5989a', + 'info_dict': { + 'id': '100000005835845', + 'ext': 'mp4', + 'title': 'How to Make Mac and Cheese', + 'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1', + 'timestamp': 1522950315, + 'upload_date': '20180405', + 'duration': 9.51, + 'creator': 'Alison Roman', + 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', }, }, { - 'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey', - 'md5': '4b2e8c70530a89b8d905a2b572316eb8', + 'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake', + 'md5': '64415805fe0b8640fce6b0b9def5989a', 'info_dict': { - 'id': '100000003951728', - 'ext': 'mov', - 'timestamp': 1445509539, - 'description': 'Turkey guide', - 'upload_date': '20151022', - 'title': 'Turkey', - } + 'id': '20-how-to-frost-a-cake', + 'title': 'How to Frost a Cake', + 'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd', + }, + 'playlist_count': 8, }] def _real_extract(self, url): page_id = self._match_id(url) - webpage = self._download_webpage(url, page_id) + title = self._html_search_meta(['og:title', 'twitter:title'], webpage) + description = self._html_search_meta(['og:description', 'twitter:description'], webpage) - video_id = self._search_regex( - r'data-video-id=["\'](\d+)', webpage, 'video id') + lead_video_id = self._search_regex( + r'data-video-player-id="(\d+)">', webpage, 'lead video') + media_ids = traverse_obj( + get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id')) - return self._extract_video_from_id(video_id) + if media_ids: + media_ids.append(lead_video_id) + return self.playlist_result( + [self._extract_video(media_id) for media_id in media_ids], page_id, title, description) + + return { + **self._extract_video(lead_video_id), + 'title': title, + 'description': description, + 'creator': self._search_regex( # TODO: change to 'creators' + r'

', webpage, 'author', default=None), + } + + +class NYTimesCookingRecipeIE(InfoExtractor): + _VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P\d+)' + _TESTS = [{ + 'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart', + 'md5': '579e83bbe8e61e9de67f80edba8a78a8', + 'info_dict': { + 'id': '1017817', + 'ext': 'mp4', + 'title': 'Cranberry Curd Tart', + 'description': 'md5:ad77a3fc321db636256d4343c5742152', + 'timestamp': 1447804800, + 'upload_date': '20151118', + 'creator': 'David Tanis', + 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', + }, + }, { + 'url': 'https://cooking.nytimes.com/recipes/1024781-neapolitan-checkerboard-cookies', + 'md5': '58df35998241dcf0620e99e646331b42', + 'info_dict': { + 'id': '1024781', + 'ext': 'mp4', + 'title': 'Neapolitan Checkerboard Cookies', + 'description': 'md5:ba12394c585ababea951cb6d2fcc6631', + 'timestamp': 1701302400, + 'upload_date': '20231130', + 'creator': 'Sue Li', + 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', + }, + }, { + 'url': 'https://cooking.nytimes.com/recipes/1019516-overnight-oats', + 'md5': '2fe7965a3adc899913b8e25ada360823', + 'info_dict': { + 'id': '1019516', + 'ext': 'mp4', + 'timestamp': 1546387200, + 'description': 'md5:8856ce10239161bd2596ac335b9f9bfb', + 'upload_date': '20190102', + 'title': 'Overnight Oats', + 'creator': 'Genevieve Ko', + 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg', + }, + }] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe'] + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls') + + return { + **traverse_obj(recipe_data, { + 'id': ('id', {str_or_none}), + 'title': ('title', {str}), + 'description': ('topnote', {clean_html}), + 'timestamp': ('publishedAt', {int_or_none}), + 'creator': ('contentAttribution', 'cardByline', {str}), + }), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj( + recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))], + } diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 9a48ae1b3e..1b2a79a625 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -1,3 +1,4 @@ +import base64 import functools import re @@ -565,3 +566,66 @@ def _real_extract(self, url): }) return self.playlist_result(entries) + + +class ORFONIE(InfoExtractor): + IE_NAME = 'orf:on' + _VALID_URL = r'https?://on\.orf\.at/video/(?P\d{8})/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', + 'info_dict': { + 'id': '14210000', + 'ext': 'mp4', + 'duration': 2651.08, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg', + 'title': 'School of Champions (4/8)', + 'description': 'md5:d09ad279fc2e8502611e7648484b6afd', + 'media_type': 'episode', + 'timestamp': 1706472362, + 'upload_date': '20240128', + } + }] + + def _extract_video(self, video_id, display_id): + encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() + api_json = self._download_json( + f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id) + + formats, subtitles = [], {} + for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)): + for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})): + if manifest_type == 'hls': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + manifest_url, display_id, fatal=False, m3u8_id='hls') + elif manifest_type == 'dash': + fmts, subs = self._extract_mpd_formats_and_subtitles( + manifest_url, display_id, fatal=False, mpd_id='dash') + else: + continue + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(api_json, { + 'duration': ('duration_second', {float_or_none}), + 'title': (('title', 'headline'), {str}), + 'description': (('description', 'teaser_text'), {str}), + 'media_type': ('video_type', {str}), + }, get_all=False), + } + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'slug') + webpage = self._download_webpage(url, display_id) + + return { + 'id': video_id, + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), + 'description': self._html_search_meta( + ['description', 'og:description', 'twitter:description'], webpage, default=None), + **self._search_json_ld(webpage, display_id, fatal=False), + **self._extract_video(video_id, display_id), + } diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 68e15737b9..730b2393e0 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -19,636 +19,902 @@ class PeerTubeIE(InfoExtractor): _INSTANCES_RE = r'''(?: # Taken from https://instances.joinpeertube.org/instances - 40two\.tube| - a\.metube\.ch| - advtv\.ml| - algorithmic\.tv| - alimulama\.com| - arcana\.fun| - archive\.vidicon\.org| - artefac-paris\.tv| - auf1\.eu| + 0ch\.tv| + 3dctube\.3dcandy\.social| + all\.electric\.kitchen| + alterscope\.fr| + anarchy\.tube| + apathy\.tv| + apertatube\.net| + archive\.nocopyrightintended\.tv| + archive\.reclaim\.tv| + area51\.media| + astrotube-ufe\.obspm\.fr| + astrotube\.obspm\.fr| + audio\.freediverse\.com| + azxtube\.youssefc\.tn| + bark\.video| battlepenguin\.video| - beertube\.epgn\.ch| - befree\.nohost\.me| + bava\.tv| + bee-tube\.fr| + beetoons\.tv| + biblion\.refchat\.net| + biblioteca\.theowlclub\.net| bideoak\.argia\.eus| - birkeundnymphe\.de| + bideoteka\.eus| + birdtu\.be| bitcointv\.com| - cattube\.org| - clap\.nerv-project\.eu| - climatejustice\.video| + bonn\.video| + breeze\.tube| + brioco\.live| + brocosoup\.fr| + canal\.facil\.services| + canard\.tube| + cdn01\.tilvids\.com| + celluloid-media\.huma-num\.fr| + chicago1\.peertube\.support| + cliptube\.org| + cloudtube\.ise\.fraunhofer\.de| comf\.tube| + comics\.peertube\.biz| + commons\.tube| + communitymedia\.video| conspiracydistillery\.com| + crank\.recoil\.org| + dalek\.zone| + dalliance\.network| + dangly\.parts| darkvapor\.nohost\.me| daschauher\.aksel\.rocks| digitalcourage\.video| - dreiecksnebel\.alex-detsch\.de| - eduvid\.org| + displayeurope\.video| + ds106\.tv| + dud-video\.inf\.tu-dresden\.de| + dud175\.inf\.tu-dresden\.de| + dytube\.com| + ebildungslabor\.video| evangelisch\.video| - exo\.tube| fair\.tube| + fedi\.video| + fedimovie\.com| fediverse\.tv| film\.k-prod\.fr| - flim\.txmn\.tk| + flipboard\.video| + foss\.video| + fossfarmers\.company| fotogramas\.politicaconciencia\.org| - ftsi\.ru| - gary\.vger\.cloud| - graeber\.video| + freediverse\.com| + freesoto-u2151\.vm\.elestio\.app| + freesoto\.tv| + garr\.tv| greatview\.video| grypstube\.uni-greifswald\.de| - highvoltage\.tv| - hpstube\.fr| - htp\.live| - hyperreal\.tube| + habratube\.site| + ilbjach\.ru| + infothema\.net| + itvplus\.iiens\.net| + johnydeep\.net| juggling\.digital| + jupiter\.tube| + kadras\.live| kino\.kompot\.si| kino\.schuerz\.at| kinowolnosc\.pl| kirche\.peertube-host\.de| + kiwi\.froggirl\.club| kodcast\.com| kolektiva\.media| - kraut\.zone| + kpop\.22x22\.ru| kumi\.tube| + la2\.peertube\.support| + la3\.peertube\.support| + la4\.peertube\.support| lastbreach\.tv| - lepetitmayennais\.fr\.nf| - lexx\.impa\.me| - libertynode\.tv| - libra\.syntazia\.org| - libremedia\.video| + lawsplaining\.peertube\.biz| + leopard\.tube| + live\.codinglab\.ch| live\.libratoi\.org| - live\.nanao\.moe| - live\.toobnix\.org| - livegram\.net| - lolitube\.freedomchan\.moe| + live\.oldskool\.fi| + live\.solari\.com| lucarne\.balsamine\.be| - maindreieck-tv\.de| - mani\.tube| - manicphase\.me| + luxtube\.lu| + makertube\.net| + media\.econoalchemist\.com| + media\.exo\.cat| media\.fsfe\.org| media\.gzevd\.de| - media\.inno3\.cricket| - media\.kaitaia\.life| + media\.interior\.edu\.uy| media\.krashboyz\.org| - media\.over-world\.org| - media\.skewed\.de| + media\.mzhd\.de| + media\.smz-ma\.de| + media\.theplattform\.net| media\.undeadnetwork\.de| + medias\.debrouillonet\.org| medias\.pingbase\.net| + mediatube\.fermalo\.fr| melsungen\.peertube-host\.de| - mirametube\.fr| - mojotube\.net| - monplaisirtube\.ddns\.net| + merci-la-police\.fr| + mindlyvideos\.com| + mirror\.peertube\.metalbanana\.net| + mirrored\.rocks| + mix\.video| mountaintown\.video| - my\.bunny\.cafe| - myfreetube\.de| + movies\.metricsmaster\.eu| + mtube\.mooo\.com| mytube\.kn-cloud\.de| + mytube\.le5emeaxe\.fr| mytube\.madzel\.de| - myworkoutarenapeertube\.cf| + nadajemy\.com| nanawel-peertube\.dyndns\.org| - nastub\.cz| - offenes\.tv| - orgdup\.media| - ovaltube\.codinglab\.ch| + neat\.tube| + nethack\.tv| + nicecrew\.tv| + nightshift\.minnix\.dev| + nolog\.media| + nyltube\.nylarea\.com| + ocfedtest\.hosted\.spacebear\.ee| + openmedia\.edunova\.it| p2ptv\.ru| p\.eertu\.be| p\.lu| + pastafriday\.club| + patriottube\.sonsofliberty\.red| + pcbu\.nl| peer\.azurs\.fr| - peertube1\.zeteo\.me| + peer\.d0g4\.me| + peer\.lukeog\.com| + peer\.madiator\.cloud| + peer\.raise-uav\.com| + peershare\.togart\.de| + peertube-blablalinux\.be| + peertube-demo\.learning-hub\.fr| + peertube-docker\.cpy\.re| + peertube-eu\.howlround\.com| + peertube-u5014\.vm\.elestio\.app| + peertube-us\.howlround\.com| peertube\.020\.pl| peertube\.0x5e\.eu| + peertube\.1984\.cz| + peertube\.2i2l\.net| + peertube\.adjutor\.xyz| + peertube\.adresse\.data\.gouv\.fr| peertube\.alpharius\.io| peertube\.am-networks\.fr| peertube\.anduin\.net| - peertube\.anzui\.dev| - peertube\.arbleizez\.bzh| + peertube\.anti-logic\.com| + peertube\.arch-linux\.cz| peertube\.art3mis\.de| - peertube\.atilla\.org| + peertube\.artsrn\.ualberta\.ca| + peertube\.askan\.info| + peertube\.astral0pitek\.synology\.me| peertube\.atsuchan\.page| - peertube\.aukfood\.net| - peertube\.aventer\.biz| + peertube\.automat\.click| peertube\.b38\.rural-it\.org| - peertube\.beeldengeluid\.nl| peertube\.be| + peertube\.beeldengeluid\.nl| peertube\.bgzashtita\.es| - peertube\.bitsandlinux\.com| + peertube\.bike| + peertube\.bildung-ekhn\.de| peertube\.biz| - peertube\.boba\.best| peertube\.br0\.fr| peertube\.bridaahost\.ynh\.fr| peertube\.bubbletea\.dev| peertube\.bubuit\.net| peertube\.cabaal\.net| - peertube\.cats-home\.net| - peertube\.chemnitz\.freifunk\.net| - peertube\.chevro\.fr| - peertube\.chrisspiegl\.com| + peertube\.chatinbit\.com| + peertube\.chaunchy\.com| + peertube\.chir\.rs| + peertube\.christianpacaud\.com| peertube\.chtisurel\.net| + peertube\.chuggybumba\.com| peertube\.cipherbliss\.com| + peertube\.cirkau\.art| + peertube\.cloud\.nerdraum\.de| peertube\.cloud\.sans\.pub| + peertube\.coko\.foundation| + peertube\.communecter\.org| + peertube\.concordia\.social| + peertube\.corrigan\.xyz| peertube\.cpge-brizeux\.fr| peertube\.ctseuro\.com| peertube\.cuatrolibertades\.org| - peertube\.cybercirujas\.club| - peertube\.cythin\.com| + peertube\.cube4fun\.net| + peertube\.dair-institute\.org| peertube\.davigge\.com| peertube\.dc\.pini\.fr| + peertube\.deadtom\.me| peertube\.debian\.social| + peertube\.delta0189\.xyz| peertube\.demonix\.fr| peertube\.designersethiques\.org| peertube\.desmu\.fr| - peertube\.devloprog\.org| peertube\.devol\.it| - peertube\.dtmf\.ca| - peertube\.ecologie\.bzh| + peertube\.dk| + peertube\.doesstuff\.social| + peertube\.eb8\.org| + peertube\.education-forum\.com| + peertube\.elforcer\.ru| + peertube\.em\.id\.lv| + peertube\.ethibox\.fr| peertube\.eu\.org| peertube\.european-pirates\.eu| + peertube\.eus| peertube\.euskarabildua\.eus| + peertube\.expi\.studio| + peertube\.familie-berner\.de| + peertube\.familleboisteau\.fr| + peertube\.fedihost\.website| peertube\.fenarinarsa\.com| - peertube\.fomin\.site| - peertube\.forsud\.be| - peertube\.francoispelletier\.org| - peertube\.freenet\.ru| - peertube\.freetalklive\.com| + peertube\.festnoz\.de| + peertube\.forteza\.fr| + peertube\.freestorm\.online| peertube\.functional\.cafe| - peertube\.gardeludwig\.fr| + peertube\.gaminglinux\.fr| peertube\.gargantia\.fr| - peertube\.gcfamily\.fr| + peertube\.geekgalaxy\.fr| + peertube\.gemlog\.ca| peertube\.genma\.fr| peertube\.get-racing\.de| + peertube\.ghis94\.ovh| peertube\.gidikroon\.eu| - peertube\.gruezishop\.ch| - peertube\.habets\.house| - peertube\.hackerfraternity\.org| + peertube\.giftedmc\.com| + peertube\.grosist\.fr| + peertube\.gruntwerk\.org| + peertube\.gsugambit\.com| + peertube\.hackerfoo\.com| + peertube\.hellsite\.net| + peertube\.helvetet\.eu| + peertube\.histoirescrepues\.fr| + peertube\.home\.x0r\.fr| + peertube\.hyperfreedom\.org| peertube\.ichigo\.everydayimshuflin\.com| - peertube\.ignifi\.me| + peertube\.ifwo\.eu| + peertube\.in\.ua| peertube\.inapurna\.org| peertube\.informaction\.info| peertube\.interhop\.org| - peertube\.iselfhost\.com| peertube\.it| + peertube\.it-arts\.net| peertube\.jensdiemer\.de| - peertube\.joffreyverd\.fr| + peertube\.johntheserg\.al| + peertube\.kaleidos\.net| peertube\.kalua\.im| - peertube\.kathryl\.fr| + peertube\.kcore\.org| peertube\.keazilla\.net| peertube\.klaewyss\.fr| - peertube\.kodcast\.com| + peertube\.kleph\.eu| + peertube\.kodein\.be| + peertube\.kooperatywa\.tech| + peertube\.kriom\.net| peertube\.kx\.studio| + peertube\.kyriog\.eu| + peertube\.la-famille-muller\.fr| + peertube\.labeuropereunion\.eu| peertube\.lagvoid\.com| - peertube\.lavallee\.tech| - peertube\.le5emeaxe\.fr| - peertube\.lestutosdeprocessus\.fr| - peertube\.librenet\.co\.za| + peertube\.lhc\.net\.br| + peertube\.libresolutions\.network| + peertube\.libretic\.fr| + peertube\.librosphere\.fr| peertube\.logilab\.fr| + peertube\.lon\.tv| peertube\.louisematic\.site| peertube\.luckow\.org| peertube\.luga\.at| peertube\.lyceeconnecte\.fr| - peertube\.manalejandro\.com| + peertube\.madixam\.xyz| + peertube\.magicstone\.dev| + peertube\.marienschule\.de| peertube\.marud\.fr| - peertube\.mattone\.net| peertube\.maxweiss\.io| + peertube\.miguelcr\.me| + peertube\.mikemestnik\.net| + peertube\.mobilsicher\.de| peertube\.monlycee\.net| peertube\.mxinfo\.fr| - peertube\.myrasp\.eu| - peertube\.nebelcloud\.de| + peertube\.naln1\.ca| peertube\.netzbegruenung\.de| - peertube\.newsocial\.tech| peertube\.nicolastissot\.fr| + peertube\.nogafam\.fr| + peertube\.normalgamingcommunity\.cz| peertube\.nz| peertube\.offerman\.com| + peertube\.ohioskates\.com| + peertube\.onionstorm\.net| peertube\.opencloud\.lu| - peertube\.orthus\.link| - peertube\.patapouf\.xyz| - peertube\.pi2\.dev| - peertube\.plataformess\.org| - peertube\.pl| - peertube\.portaesgnos\.org| + peertube\.otakufarms\.com| + peertube\.paladyn\.org| + peertube\.pix-n-chill\.fr| peertube\.r2\.enst\.fr| peertube\.r5c3\.fr| - peertube\.radres\.xyz| - peertube\.red| - peertube\.robonomics\.network| - peertube\.rtnkv\.cloud| - peertube\.runfox\.tk| + peertube\.redpill-insight\.com| + peertube\.researchinstitute\.at| + peertube\.revelin\.fr| + peertube\.rlp\.schule| + peertube\.rokugan\.fr| + peertube\.rougevertbleu\.tv| + peertube\.roundpond\.net| + peertube\.rural-it\.org| peertube\.satoshishop\.de| - peertube\.scic-tetris\.org| + peertube\.scyldings\.com| peertube\.securitymadein\.lu| + peertube\.semperpax\.com| peertube\.semweb\.pro| - peertube\.social\.my-wan\.de| - peertube\.soykaf\.org| - peertube\.stefofficiel\.me| + peertube\.sensin\.eu| + peertube\.sidh\.bzh| + peertube\.skorpil\.cz| + peertube\.smertrios\.com| + peertube\.sqweeb\.net| + peertube\.stattzeitung\.org| peertube\.stream| peertube\.su| peertube\.swrs\.net| peertube\.takeko\.cyou| - peertube\.tangentfox\.com| peertube\.taxinachtegel\.de| - peertube\.thenewoil\.xyz| + peertube\.teftera\.com| + peertube\.teutronic-services\.de| peertube\.ti-fr\.com| peertube\.tiennot\.net| - peertube\.troback\.com| + peertube\.tmp\.rcp\.tf| peertube\.tspu\.edu\.ru| - peertube\.tux\.ovh| peertube\.tv| peertube\.tweb\.tv| - peertube\.ucy\.de| peertube\.underworld\.fr| - peertube\.us\.to| - peertube\.ventresmous\.fr| + peertube\.vapronva\.pw| + peertube\.veen\.world| + peertube\.vesdia\.eu| + peertube\.virtual-assembly\.org| + peertube\.viviers-fibre\.net| peertube\.vlaki\.cz| - peertube\.w\.utnw\.de| - peertube\.westring\.digital| + peertube\.wiesbaden\.social| + peertube\.wivodaim\.net| + peertube\.wtf| + peertube\.wtfayla\.net| + peertube\.xrcb\.cat| peertube\.xwiki\.com| + peertube\.zd\.do| + peertube\.zetamc\.net| + peertube\.zmuuf\.org| peertube\.zoz-serv\.org| + peertube\.zwindler\.fr| peervideo\.ru| periscope\.numenaute\.org| - perron-tube\.de| + pete\.warpnine\.de| petitlutinartube\.fr| phijkchu\.com| - pierre\.tube| + phoenixproject\.group| piraten\.space| - play\.rosano\.ca| + pirtube\.calut\.fr| + pityu\.flaki\.hu| + play\.mittdata\.se| player\.ojamajo\.moe| - plextube\.nl| - pocketnetpeertube1\.nohost\.me| - pocketnetpeertube3\.nohost\.me| - pocketnetpeertube4\.nohost\.me| - pocketnetpeertube5\.nohost\.me| - pocketnetpeertube6\.nohost\.me| - pt\.24-7\.ro| - pt\.apathy\.top| + podlibre\.video| + portal\.digilab\.nfa\.cz| + private\.fedimovie\.com| + pt01\.lehrerfortbildung-bw\.de| pt\.diaspodon\.fr| - pt\.fedi\.tech| - pt\.maciej\.website| + pt\.freedomwolf\.cc| + pt\.gordons\.gen\.nz| + pt\.ilyamikcoder\.com| + pt\.irnok\.net| + pt\.mezzo\.moe| + pt\.na4\.eu| + pt\.netcraft\.ch| + pt\.rwx\.ch| + pt\.sfunk1x\.com| + pt\.thishorsie\.rocks| + pt\.vern\.cc| ptb\.lunarviews\.net| - ptmir1\.inter21\.net| - ptmir2\.inter21\.net| - ptmir3\.inter21\.net| - ptmir4\.inter21\.net| - ptmir5\.inter21\.net| - ptube\.horsentiers\.fr| - ptube\.xmanifesto\.club| - queermotion\.org| - re-wizja\.re-medium\.com| - regarder\.sans\.pub| - ruraletv\.ovh| - s1\.gegenstimme\.tv| - s2\.veezee\.tube| + ptube\.de| + ptube\.ranranhome\.info| + puffy\.tube| + puppet\.zone| + qtube\.qlyoung\.net| + quantube\.win| + rankett\.net| + replay\.jres\.org| + review\.peertube\.biz| sdmtube\.fr| - sender-fm\.veezee\.tube| - serv1\.wiki-tube\.de| + secure\.direct-live\.net| + secure\.scanovid\.com| + seka\.pona\.la| serv3\.wiki-tube\.de| - sickstream\.net| - sleepy\.tube| + skeptube\.fr| + social\.fedimovie\.com| + socpeertube\.ru| sovran\.video| + special\.videovortex\.tv| spectra\.video| + stl1988\.peertube-host\.de| + stream\.biovisata\.lt| + stream\.conesphere\.cloud| stream\.elven\.pw| + stream\.jurnalfm\.md| stream\.k-prod\.fr| - stream\.shahab\.nohost\.me| - streamsource\.video| + stream\.litera\.tools| + stream\.nuemedia\.se| + stream\.rlp-media\.de| + stream\.vrse\.be| studios\.racer159\.com| - testtube\.florimond\.eu| + styxhexenhammer666\.com| + syrteplay\.obspm\.fr| + t\.0x0\.st| + tbh\.co-shaoghal\.net| + test-fab\.ynh\.fr| + testube\.distrilab\.fr| tgi\.hosted\.spacebear\.ee| - thaitube\.in\.th| - the\.jokertv\.eu| theater\.ethernia\.net| thecool\.tube| + thevideoverse\.com| tilvids\.com| - toob\.bub\.org| - tpaw\.video| - truetube\.media| - tuba\.lhub\.pl| - tube-aix-marseille\.beta\.education\.fr| - tube-amiens\.beta\.education\.fr| - tube-besancon\.beta\.education\.fr| - tube-bordeaux\.beta\.education\.fr| - tube-clermont-ferrand\.beta\.education\.fr| - tube-corse\.beta\.education\.fr| - tube-creteil\.beta\.education\.fr| - tube-dijon\.beta\.education\.fr| - tube-education\.beta\.education\.fr| - tube-grenoble\.beta\.education\.fr| - tube-lille\.beta\.education\.fr| - tube-limoges\.beta\.education\.fr| - tube-montpellier\.beta\.education\.fr| - tube-nancy\.beta\.education\.fr| - tube-nantes\.beta\.education\.fr| - tube-nice\.beta\.education\.fr| - tube-normandie\.beta\.education\.fr| - tube-orleans-tours\.beta\.education\.fr| - tube-outremer\.beta\.education\.fr| - tube-paris\.beta\.education\.fr| - tube-poitiers\.beta\.education\.fr| - tube-reims\.beta\.education\.fr| - tube-rennes\.beta\.education\.fr| - tube-strasbourg\.beta\.education\.fr| - tube-toulouse\.beta\.education\.fr| - tube-versailles\.beta\.education\.fr| - tube1\.it\.tuwien\.ac\.at| + tinkerbetter\.tube| + tinsley\.video| + trailers\.ddigest\.com| + tube-action-educative\.apps\.education\.fr| + tube-arts-lettres-sciences-humaines\.apps\.education\.fr| + tube-cycle-2\.apps\.education\.fr| + tube-cycle-3\.apps\.education\.fr| + tube-education-physique-et-sportive\.apps\.education\.fr| + tube-enseignement-professionnel\.apps\.education\.fr| + tube-institutionnel\.apps\.education\.fr| + tube-langues-vivantes\.apps\.education\.fr| + tube-maternelle\.apps\.education\.fr| + tube-numerique-educatif\.apps\.education\.fr| + tube-sciences-technologies\.apps\.education\.fr| + tube-test\.apps\.education\.fr| + tube1\.perron-service\.de| + tube\.9minuti\.it| tube\.abolivier\.bzh| - tube\.ac-amiens\.fr| - tube\.aerztefueraufklaerung\.de| - tube\.alexx\.ml| + tube\.alado\.space| tube\.amic37\.fr| - tube\.anufrij\.de| - tube\.apolut\.net| - tube\.arkhalabs\.io| + tube\.area404\.cloud| tube\.arthack\.nz| - tube\.as211696\.net| - tube\.avensio\.de| + tube\.asulia\.fr| + tube\.awkward\.company| tube\.azbyka\.ru| tube\.azkware\.net| - tube\.bachaner\.fr| - tube\.bmesh\.org| - tube\.borked\.host| + tube\.bartrip\.me\.uk| + tube\.belowtoxic\.media| + tube\.bingle\.plus| + tube\.bit-friends\.de| tube\.bstly\.de| - tube\.chaoszone\.tv| - tube\.chatelet\.ovh| - tube\.cloud-libre\.eu| + tube\.chosto\.me| tube\.cms\.garden| - tube\.cowfee\.moe| - tube\.cryptography\.dog| - tube\.darknight-coffee\.org| - tube\.dev\.lhub\.pl| + tube\.communia\.org| + tube\.cyberia\.club| + tube\.cybershock\.life| + tube\.dembased\.xyz| + tube\.dev\.displ\.eu| + tube\.digitalesozialearbeit\.de| tube\.distrilab\.fr| + tube\.doortofreedom\.org| tube\.dsocialize\.net| + tube\.e-jeremy\.com| tube\.ebin\.club| + tube\.elemac\.fr| + tube\.erzbistum-hamburg\.de| + tube\.exozy\.me| tube\.fdn\.fr| - tube\.florimond\.eu| - tube\.foxarmy\.ml| - tube\.foxden\.party| - tube\.frischesicht\.de| + tube\.fedi\.quebec| + tube\.fediverse\.at| + tube\.felinn\.org| + tube\.flokinet\.is| + tube\.foad\.me\.uk| + tube\.freepeople\.fr| + tube\.friloux\.me| + tube\.froth\.zone| + tube\.fulda\.social| tube\.futuretic\.fr| - tube\.gnous\.eu| + tube\.g1zm0\.de| + tube\.g4rf\.net| + tube\.gaiac\.io| + tube\.geekyboo\.net| + tube\.genb\.de| + tube\.ghk-academy\.info| + tube\.gi-it\.de| tube\.grap\.coop| tube\.graz\.social| tube\.grin\.hu| - tube\.hackerscop\.org| - tube\.hordearii\.fr| + tube\.hokai\.lol| + tube\.int5\.net| + tube\.interhacker\.space| + tube\.invisible\.ch| + tube\.io18\.top| + tube\.itsg\.host| tube\.jeena\.net| - tube\.kai-stuht\.com| + tube\.kh-berlin\.de| tube\.kockatoo\.org| tube\.kotur\.org| + tube\.koweb\.fr| + tube\.la-dina\.net| + tube\.lab\.nrw| tube\.lacaveatonton\.ovh| + tube\.laurent-malys\.fr| + tube\.leetdreams\.ch| tube\.linkse\.media| tube\.lokad\.com| tube\.lucie-philou\.com| - tube\.melonbread\.xyz| - tube\.mfraters\.net| - tube\.motuhake\.xyz| - tube\.mrbesen\.de| - tube\.nah\.re| - tube\.nchoco\.net| + tube\.media-techport\.de| + tube\.morozoff\.pro| + tube\.neshweb\.net| + tube\.nestor\.coop| + tube\.network\.europa\.eu| + tube\.nicfab\.eu| + tube\.nieuwwestbrabant\.nl| + tube\.nogafa\.org| tube\.novg\.net| tube\.nox-rhea\.org| tube\.nuagelibre\.fr| + tube\.numerique\.gouv\.fr| + tube\.nuxnik\.com| tube\.nx12\.net| tube\.octaplex\.net| - tube\.odat\.xyz| tube\.oisux\.org| + tube\.okcinfo\.news| + tube\.onlinekirche\.net| tube\.opportunis\.me| + tube\.oraclefilms\.com| tube\.org\.il| - tube\.ortion\.xyz| - tube\.others\.social| + tube\.pacapime\.ovh| + tube\.parinux\.org| + tube\.pastwind\.top| tube\.picasoft\.net| - tube\.plomlompom\.com| + tube\.pilgerweg-21\.de| tube\.pmj\.rocks| + tube\.pol\.social| + tube\.ponsonaille\.fr| tube\.portes-imaginaire\.org| + tube\.public\.apolut\.net| + tube\.pustule\.org| tube\.pyngu\.com| + tube\.querdenken-711\.de| tube\.rebellion\.global| + tube\.reseau-canope\.fr| tube\.rhythms-of-resistance\.org| - tube\.rita\.moe| + tube\.risedsky\.ovh| + tube\.rooty\.fr| tube\.rsi\.cnr\.it| - tube\.s1gm4\.eu| - tube\.saumon\.io| + tube\.ryne\.moe| tube\.schleuss\.online| tube\.schule\.social| - tube\.seditio\.fr| + tube\.sekretaerbaer\.net| tube\.shanti\.cafe| tube\.shela\.nu| tube\.skrep\.in| + tube\.sleeping\.town| tube\.sp-codes\.de| - tube\.sp4ke\.com| - tube\.superseriousbusiness\.org| + tube\.spdns\.org| + tube\.systerserver\.net| tube\.systest\.eu| tube\.tappret\.fr| - tube\.tardis\.world| - tube\.toontoet\.nl| + tube\.techeasy\.org| + tube\.thierrytalbert\.fr| + tube\.tinfoil-hat\.net| + tube\.toldi\.eu| tube\.tpshd\.de| + tube\.trax\.im| tube\.troopers\.agency| + tube\.ttk\.is| + tube\.tuxfriend\.fr| tube\.tylerdavis\.xyz| + tube\.ullihome\.de| + tube\.ulne\.be| tube\.undernet\.uy| - tube\.vigilian-consulting\.nl| - tube\.vraphim\.com| - tube\.wehost\.lgbt| - tube\.wien\.rocks| + tube\.vrpnet\.org| tube\.wolfe\.casa| tube\.xd0\.de| + tube\.xn--baw-joa\.social| tube\.xy-space\.de| tube\.yapbreak\.fr| tubedu\.org| - tubes\.jodh\.us| - tuktube\.com| - turkum\.me| + tubulus\.openlatin\.org| + turtleisland\.video| tututu\.tube| - tuvideo\.encanarias\.info| - tv1\.cocu\.cc| - tv1\.gomntu\.space| - tv2\.cocu\.cc| + tv\.adast\.dk| tv\.adn\.life| + tv\.arns\.lt| tv\.atmx\.ca| - tv\.bitma\.st| - tv\.generallyrubbish\.net\.au| + tv\.based\.quest| + tv\.farewellutopia\.com| + tv\.filmfreedom\.net| + tv\.gravitons\.org| + tv\.io\.seg\.br| tv\.lumbung\.space| - tv\.mattchristiansenmedia\.com| - tv\.netwhood\.online| - tv\.neue\.city| - tv\.piejacker\.net| tv\.pirateradio\.social| + tv\.pirati\.cz| + tv\.santic-zombie\.ru| tv\.undersco\.re| + tv\.zonepl\.net| tvox\.ru| twctube\.twc-zone\.eu| - unfilter\.tube| + twobeek\.com| + urbanists\.video| + v\.9tail\.net| v\.basspistol\.org| + v\.j4\.lc| v\.kisombrella\.top| - v\.lastorder\.xyz| + v\.koa\.im| + v\.kyaru\.xyz| v\.lor\.sh| - v\.phreedom\.club| - v\.sil\.sh| - v\.szy\.io| - v\.xxxapex\.com| - veezee\.tube| - vid\.dascoyote\.xyz| - vid\.garwood\.io| - vid\.ncrypt\.at| - vid\.pravdastalina\.info| - vid\.qorg11\.net| - vid\.rajeshtaylor\.com| - vid\.samtripoli\.com| - vid\.werefox\.dev| + v\.mkp\.ca| + v\.posm\.gay| + v\.slaycer\.top| + veedeo\.org| + vhs\.absturztau\.be| + vid\.cthos\.dev| + vid\.kinuseka\.us| + vid\.mkp\.ca| + vid\.nocogabriel\.fr| + vid\.norbipeti\.eu| + vid\.northbound\.online| + vid\.ohboii\.de| + vid\.plantplotting\.co\.uk| + vid\.pretok\.tv| + vid\.prometheus\.systems| + vid\.soafen\.love| + vid\.twhtv\.club| vid\.wildeboer\.net| video-cave-v2\.de| + video-liberty\.com| video\.076\.ne\.jp| video\.1146\.nohost\.me| - video\.altertek\.org| + video\.9wd\.eu| + video\.abraum\.de| + video\.ados\.accoord\.fr| + video\.amiga-ng\.org| video\.anartist\.org| - video\.apps\.thedoodleproject\.net| - video\.artist\.cx| video\.asgardius\.company| - video\.balsillie\.net| + video\.audiovisuel-participatif\.org| video\.bards\.online| - video\.binarydad\.com| + video\.barkoczy\.social| + video\.benetou\.fr| + video\.beyondwatts\.social| + video\.bgeneric\.net| + video\.bilecik\.edu\.tr| video\.blast-info\.fr| + video\.bmu\.cloud| video\.catgirl\.biz| + video\.causa-arcana\.com| + video\.chasmcity\.net| + video\.chbmeyer\.de| video\.cigliola\.com| - video\.cm-en-transition\.fr| + video\.citizen4\.eu| + video\.clumsy\.computer| + video\.cnnumerique\.fr| + video\.cnr\.it| video\.cnt\.social| video\.coales\.co| - video\.codingfield\.com| - video\.comptoir\.net| video\.comune\.trento\.it| - video\.cpn\.so| + video\.coyp\.us| video\.csc49\.fr| - video\.cybre\.town| - video\.demokratischer-sommer\.de| - video\.discord-insoumis\.fr| - video\.dolphincastle\.com| + video\.davduf\.net| + video\.davejansen\.com| + video\.dlearning\.nl| + video\.dnfi\.no| video\.dresden\.network| - video\.ecole-89\.com| - video\.elgrillolibertario\.org| + video\.drgnz\.club| + video\.dudenas\.lt| + video\.eientei\.org| + video\.ellijaymakerspace\.org| video\.emergeheart\.info| video\.eradicatinglove\.xyz| - video\.ethantheenigma\.me| - video\.exodus-privacy\.eu\.org| - video\.fbxl\.net| + video\.everythingbagel\.me| + video\.extremelycorporate\.ca| + video\.fabiomanganiello\.com| + video\.fedi\.bzh| video\.fhtagn\.org| - video\.greenmycity\.eu| - video\.guerredeclasse\.fr| + video\.firehawk-systems\.com| + video\.fox-romka\.ru| + video\.fuss\.bz\.it| + video\.glassbeadcollective\.org| + video\.graine-pdl\.org| video\.gyt\.is| - video\.hackers\.town| + video\.hainry\.fr| video\.hardlimit\.com| - video\.hooli\.co| + video\.hostux\.net| video\.igem\.org| + video\.infojournal\.fr| video\.internet-czas-dzialac\.pl| + video\.interru\.io| + video\.ipng\.ch| + video\.ironsysadmin\.com| video\.islameye\.com| - video\.kicik\.fr| + video\.jacen\.moe| + video\.jadin\.me| + video\.jeffmcbride\.net| + video\.jigmedatse\.com| video\.kuba-orlik\.name| - video\.kyushojitsu\.ca| + video\.lacalligramme\.fr| + video\.lanceurs-alerte\.fr| + video\.laotra\.red| + video\.lapineige\.fr| + video\.laraffinerie\.re| video\.lavolte\.net| - video\.lespoesiesdheloise\.fr| video\.liberta\.vip| - video\.liege\.bike| + video\.libreti\.net| + video\.licentia\.net| video\.linc\.systems| video\.linux\.it| video\.linuxtrent\.it| - video\.lokal\.social| + video\.liveitlive\.show| video\.lono\.space| - video\.lunasqu\.ee| + video\.lrose\.de| + video\.lunago\.net| video\.lundi\.am| + video\.lycee-experimental\.org| + video\.maechler\.cloud| video\.marcorennmaus\.de| video\.mass-trespass\.uk| + video\.matomocamp\.org| + video\.medienzentrum-harburg\.de| + video\.mentality\.rip| + video\.metaversum\.wtf| + video\.midreality\.com| + video\.mttv\.it| video\.mugoreve\.fr| - video\.mundodesconocido\.com| + video\.mxtthxw\.art| video\.mycrowd\.ca| + video\.niboe\.info| video\.nogafam\.es| - video\.odayacres\.farm| + video\.nstr\.no| + video\.occm\.cc| + video\.off-investigation\.fr| + video\.olos311\.org| + video\.ordinobsolete\.fr| + video\.osvoj\.ru| + video\.ourcommon\.cloud| video\.ozgurkon\.org| - video\.p1ng0ut\.social| - video\.p3x\.de| video\.pcf\.fr| - video\.pony\.gallery| - video\.potate\.space| - video\.pourpenser\.pro| - video\.progressiv\.dev| + video\.pcgaldo\.com| + video\.phyrone\.de| + video\.poul\.org| + video\.publicspaces\.net| + video\.pullopen\.xyz| + video\.r3s\.nrw| + video\.rainevixen\.com| video\.resolutions\.it| - video\.rw501\.de| - video\.screamer\.wiki| - video\.sdm-tools\.net| + video\.retroedge\.tech| + video\.rhizome\.org| + video\.rlp-media\.de| + video\.rs-einrich\.de| + video\.rubdos\.be| + video\.sadmin\.io| video\.sftblw\.moe| video\.shitposter\.club| - video\.skyn3t\.in| + video\.simplex-software\.ru| + video\.slipfox\.xyz| + video\.snug\.moe| + video\.software-fuer-engagierte\.de| video\.soi\.ch| - video\.stuartbrand\.co\.uk| + video\.sonet\.ws| + video\.surazal\.net| + video\.taskcards\.eu| + video\.team-lcbs\.eu| + video\.techforgood\.social| + video\.telemillevaches\.net| + video\.thepolarbear\.co\.uk| video\.thinkof\.name| - video\.toot\.pt| + video\.tii\.space| + video\.tkz\.es| + video\.trankil\.info| video\.triplea\.fr| + video\.tum\.social| video\.turbo\.chat| + video\.uriopss-pdl\.fr| + video\.ustim\.ru| + video\.ut0pia\.org| video\.vaku\.org\.ua| + video\.vegafjord\.me| video\.veloma\.org| video\.violoncello\.ch| - video\.wilkie\.how| - video\.wsf2021\.info| - videorelay\.co| + video\.voidconspiracy\.band| + video\.wakkeren\.nl| + video\.windfluechter\.org| + video\.ziez\.eu| videos-passages\.huma-num\.fr| - videos\.3d-wolf\.com| + videos\.aadtp\.be| videos\.ahp-numerique\.fr| - videos\.alexandrebadalo\.pt| + videos\.alamaisondulibre\.org| videos\.archigny\.net| + videos\.aroaduntraveled\.com| + videos\.b4tech\.org| videos\.benjaminbrady\.ie| - videos\.buceoluegoexisto\.com| - videos\.capas\.se| - videos\.casually\.cat| + videos\.bik\.opencloud\.lu| videos\.cloudron\.io| + videos\.codingotaku\.com| videos\.coletivos\.org| + videos\.collate\.social| videos\.danksquad\.org| - videos\.denshi\.live| - videos\.fromouter\.space| + videos\.digitaldragons\.eu| + videos\.dromeadhere\.fr| + videos\.explain-it\.org| + videos\.factsonthegroundshow\.com| + videos\.foilen\.com| videos\.fsci\.in| + videos\.gamercast\.net| + videos\.gianmarco\.gg| videos\.globenet\.org| + videos\.grafo\.zone| videos\.hauspie\.fr| videos\.hush\.is| + videos\.hyphalfusion\.network| + videos\.icum\.to| + videos\.im\.allmendenetz\.de| + videos\.jacksonchen666\.com| videos\.john-livingston\.fr| - videos\.jordanwarne\.xyz| - videos\.lavoixdessansvoix\.org| + videos\.knazarov\.com| + videos\.kuoushi\.com| + videos\.laliguepaysdelaloire\.org| + videos\.lemouvementassociatif-pdl\.org| videos\.leslionsfloorball\.fr| - videos\.lucero\.top| - videos\.martyn\.berlin| + videos\.librescrum\.org| videos\.mastodont\.cat| - videos\.monstro1\.com| - videos\.npo\.city| - videos\.optoutpod\.com| - videos\.petch\.rocks| - videos\.pzelawski\.xyz| + videos\.metus\.ca| + videos\.miolo\.org| + videos\.offroad\.town| + videos\.openmandriva\.org| + videos\.parleur\.net| + videos\.pcorp\.us| + videos\.pop\.eu\.com| videos\.rampin\.org| + videos\.rauten\.co\.za| + videos\.ritimo\.org| + videos\.sarcasmstardust\.com| videos\.scanlines\.xyz| videos\.shmalls\.pw| - videos\.sibear\.fr| videos\.stadtfabrikanten\.org| - videos\.tankernn\.eu| + videos\.supertuxkart\.net| videos\.testimonia\.org| - videos\.thisishowidontdisappear\.com| - videos\.traumaheilung\.net| + videos\.thinkerview\.com| + videos\.torrenezzi10\.xyz| videos\.trom\.tf| - videos\.wakkerewereld\.nu| - videos\.weblib\.re| + videos\.utsukta\.org| + videos\.viorsan\.com| + videos\.wherelinux\.xyz| + videos\.wikilibriste\.fr| videos\.yesil\.club| + videos\.yeswiki\.net| + videotube\.duckdns\.org| + vids\.capypara\.de| vids\.roshless\.me| + vids\.stary\.pc\.pl| vids\.tekdmn\.me| - vidz\.dou\.bet| - vod\.lumikko\.dev| - vs\.uniter\.network| + vidz\.julien\.ovh| + views\.southfox\.me| + virtual-girls-are\.definitely-for\.me| + viste\.pt| + vnchich\.com| + vnop\.org| + vod\.newellijay\.tv| + voluntarytube\.com| + vtr\.chikichiki\.tube| vulgarisation-informatique\.fr| - watch\.breadtube\.tv| - watch\.deranalyst\.ch| + watch\.easya\.solutions| + watch\.goodluckgabe\.life| watch\.ignorance\.eu| - watch\.krazy\.party| + watch\.jimmydore\.com| watch\.libertaria\.space| - watch\.rt4mn\.org| - watch\.softinio\.com| + watch\.nuked\.social| + watch\.ocaml\.org| + watch\.thelema\.social| watch\.tubelab\.video| web-fellow\.de| webtv\.vandoeuvre\.net| - wechill\.space| + wetubevid\.online| wikileaks\.video| wiwi\.video| - worldofvids\.com| - wwtube\.net| - www4\.mir\.inter21\.net| - www\.birkeundnymphe\.de| - www\.captain-german\.com| - www\.wiki-tube\.de| + wow\.such\.disappointment\.fail| + www\.jvideos\.net| + www\.kotikoff\.net| + www\.makertube\.net| + www\.mypeer\.tube| + www\.nadajemy\.com| + www\.neptube\.io| + www\.rocaguinarda\.tv| + www\.vnshow\.net| xxivproduction\.video| - xxx\.noho\.st| + yt\.orokoro\.ru| + ytube\.retronerd\.at| + zumvideo\.de| # from youtube-dl peertube\.rainbowswingers\.net| @@ -1305,24 +1571,6 @@ class PeerTubePlaylistIE(InfoExtractor): (?P[^/]+) ''' % (PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys())) _TESTS = [{ - 'url': 'https://peertube.tux.ovh/w/p/3af94cba-95e8-4b74-b37a-807ab6d82526', - 'info_dict': { - 'id': '3af94cba-95e8-4b74-b37a-807ab6d82526', - 'description': 'playlist', - 'timestamp': 1611171863, - 'title': 'playlist', - }, - 'playlist_mincount': 6, - }, { - 'url': 'https://peertube.tux.ovh/w/p/wkyqcQBnsvFxtUB2pkYc1e', - 'info_dict': { - 'id': 'wkyqcQBnsvFxtUB2pkYc1e', - 'description': 'Cette liste de vidéos contient uniquement les jeux qui peuvent être terminés en une seule vidéo.', - 'title': 'Let\'s Play', - 'timestamp': 1604147331, - }, - 'playlist_mincount': 6, - }, { 'url': 'https://peertube.debian.social/w/p/hFdJoTuyhNJVa1cDWd1d12', 'info_dict': { 'id': 'hFdJoTuyhNJVa1cDWd1d12', diff --git a/yt_dlp/extractor/prankcast.py b/yt_dlp/extractor/prankcast.py index b2ec5bbb86..562aca0ff1 100644 --- a/yt_dlp/extractor/prankcast.py +++ b/yt_dlp/extractor/prankcast.py @@ -1,5 +1,8 @@ +import json + from .common import InfoExtractor -from ..utils import parse_iso8601, traverse_obj, try_call +from ..utils import float_or_none, parse_iso8601, str_or_none, try_call +from ..utils.traversal import traverse_obj class PrankCastIE(InfoExtractor): @@ -64,3 +67,71 @@ def _real_extract(self, url): 'categories': [json_info.get('broadcast_category')], 'tags': try_call(lambda: json_info['broadcast_tags'].split(',')) } + + +class PrankCastPostIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P\d+)-(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-', + 'info_dict': { + 'id': '6214', + 'ext': 'mp3', + 'title': 'Happy National Rachel Day!', + 'display_id': 'happy-national-rachel-day-', + 'timestamp': 1704333938, + 'uploader': 'Devonanustart', + 'channel_id': '4', + 'duration': 13175, + 'cast': ['Devonanustart'], + 'description': '', + 'categories': ['prank call'], + 'upload_date': '20240104' + } + }, { + 'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-', + 'info_dict': { + 'id': '6217', + 'ext': 'mp3', + 'title': 'Jake the Work Crow!', + 'display_id': 'jake-the-work-crow-', + 'timestamp': 1704346592, + 'uploader': 'despicabledogs', + 'channel_id': '957', + 'duration': 263.287, + 'cast': ['despicabledogs'], + 'description': 'https://imgur.com/a/vtxLvKU', + 'categories': [], + 'upload_date': '20240104' + } + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + + webpage = self._download_webpage(url, video_id) + post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts'] + content = self._parse_json(post['post_contents_json'], video_id)[0] + + uploader = post.get('user_name') + guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {} + + return { + 'id': video_id, + 'title': post.get('post_title') or self._og_search_title(webpage), + 'display_id': display_id, + 'url': content.get('url'), + 'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '), + 'uploader': uploader, + 'channel_id': str_or_none(post.get('user_id')), + 'duration': float_or_none(content.get('duration')), + 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))), + 'description': post.get('post_body'), + 'categories': list(filter(None, [content.get('category')])), + 'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))), + 'subtitles': { + 'live_chat': [{ + 'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=', + 'ext': 'json', + }], + } if post.get('content_id') else None + } diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index c363d9ba5f..2b64059995 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -1,5 +1,6 @@ import base64 import random +import re import urllib.parse from .common import InfoExtractor @@ -11,6 +12,7 @@ unified_timestamp, update_url_query, ) +from ..utils.traversal import traverse_obj class RadikoBaseIE(InfoExtractor): @@ -159,6 +161,12 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, return formats + def _extract_performers(self, prog): + performers = traverse_obj(prog, ( + 'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip})) + # TODO: change 'artist' fields to 'artists' and return traversal list instead of str + return ', '.join(performers) or None + class RadikoIE(RadikoBaseIE): _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P[A-Z0-9-]+)/(?P\d+)' @@ -186,10 +194,12 @@ def _real_extract(self, url): return { 'id': video_id, 'title': try_call(lambda: prog.find('title').text), + 'artist': self._extract_performers(prog), 'description': clean_html(try_call(lambda: prog.find('info').text)), 'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader_id': station, 'timestamp': vid_int, + 'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)), 'is_live': True, 'formats': self._extract_formats( video_id=video_id, station=station, is_onair=False, @@ -243,6 +253,7 @@ def _real_extract(self, url): return { 'id': station, 'title': title, + 'artist': self._extract_performers(prog), 'description': description, 'uploader': station_name, 'uploader_id': station, diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 36d530dafc..965abbee8a 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -12,7 +12,7 @@ class RedTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P[0-9]+)' + _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P[0-9]+)' _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)'] _TESTS = [{ 'url': 'https://www.redtube.com/38864951', @@ -35,6 +35,9 @@ class RedTubeIE(InfoExtractor): }, { 'url': 'http://it.redtube.com/66418', 'only_matching': True, + }, { + 'url': 'https://www.redtube.com.br/103224331', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 2aa0dd870a..a8d00e243a 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -21,7 +21,7 @@ class TVPIE(InfoExtractor): IE_NAME = 'tvp' IE_DESC = 'Telewizja Polska' - _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P\d+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P\d+)(?:[/?#]|$)' _TESTS = [{ # TVPlayer 2 in js wrapper @@ -514,7 +514,7 @@ def _parse_video(self, video, with_url=True): class TVPVODVideoIE(TVPVODBaseIE): IE_NAME = 'tvp:vod' - _VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+(?\d+)(?:\?[^#]+)?(?:#.+)?$' + _VALID_URL = r'https?://vod\.tvp\.pl/(?P[a-z\d-]+,\d+)/[a-z\d-]+(?\d+)/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', @@ -560,12 +560,23 @@ class TVPVODVideoIE(TVPVODBaseIE): 'thumbnail': 're:https?://.+', }, 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://vod.tvp.pl/live,1/tvp-world,399731', + 'info_dict': { + 'id': '399731', + 'ext': 'mp4', + 'title': r're:TVP WORLD \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'live_status': 'is_live', + 'thumbnail': 're:https?://.+', + }, }] def _real_extract(self, url): - video_id = self._match_id(url) + category, video_id = self._match_valid_url(url).group('category', 'id') - info_dict = self._parse_video(self._call_api(f'vods/{video_id}', video_id), with_url=False) + is_live = category == 'live,1' + entity = 'lives' if is_live else 'vods' + info_dict = self._parse_video(self._call_api(f'{entity}/{video_id}', video_id), with_url=False) playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'}) @@ -582,6 +593,8 @@ def _real_extract(self, url): 'ext': 'ttml', }) + info_dict['is_live'] = is_live + return info_dict diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index e5e8144bb1..208e111849 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -269,7 +269,7 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None): 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {} if not jwt_response.get('jwt'): return - headers = {'Authorization': 'jwt %s' % jwt_response['jwt']} + headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'} original_response = self._download_json( f'https://api.vimeo.com/videos/{video_id}', video_id, headers=headers, fatal=False, expected_status=(403, 404)) or {} @@ -751,6 +751,7 @@ def _extract_from_api(self, video_id, unlisted_hash=None): video = self._download_json( api_url, video_id, headers={ 'Authorization': 'jwt ' + token, + 'Accept': 'application/json', }, query={ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', }) @@ -785,7 +786,7 @@ def _try_album_password(self, url): jwt = viewer['jwt'] album = self._download_json( 'https://api.vimeo.com/albums/' + album_id, - album_id, headers={'Authorization': 'jwt ' + jwt}, + album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'}, query={'fields': 'description,name,privacy'}) if try_get(album, lambda x: x['privacy']['view']) == 'password': password = self.get_param('videopassword') @@ -1147,10 +1148,12 @@ def _fetch_page(self, album_id, authorization, hashed_pass, page): 'https://api.vimeo.com/albums/%s/videos' % album_id, album_id, 'Downloading page %d' % api_page, query=query, headers={ 'Authorization': 'jwt ' + authorization, + 'Accept': 'application/json', })['data'] except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 400: return + raise for video in videos: link = video.get('link') if not link: @@ -1171,7 +1174,7 @@ def _real_extract(self, url): jwt = viewer['jwt'] album = self._download_json( 'https://api.vimeo.com/albums/' + album_id, - album_id, headers={'Authorization': 'jwt ' + jwt}, + album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'}, query={'fields': 'description,name,privacy'}) hashed_pass = None if try_get(album, lambda x: x['privacy']['view']) == 'password': diff --git a/yt_dlp/extractor/wasdtv.py b/yt_dlp/extractor/wasdtv.py deleted file mode 100644 index f57c619b5f..0000000000 --- a/yt_dlp/extractor/wasdtv.py +++ /dev/null @@ -1,159 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - parse_iso8601, - traverse_obj, - try_get, -) - - -class WASDTVBaseIE(InfoExtractor): - - def _fetch(self, path, video_id, description, query={}): - response = self._download_json( - f'https://wasd.tv/api/{path}', video_id, query=query, - note=f'Downloading {description} metadata', - errnote=f'Unable to download {description} metadata') - error = response.get('error') - if error: - raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True) - return response.get('result') - - def _extract_thumbnails(self, thumbnails_dict): - return [{ - 'url': url, - 'preference': index, - } for index, url in enumerate( - traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url] - - def _real_extract(self, url): - container = self._get_container(url) - stream = traverse_obj(container, ('media_container_streams', 0)) - media = try_get(stream, lambda x: x['stream_media'][0]) - if not media: - raise ExtractorError('Can not extract media data.', expected=True) - media_meta = media.get('media_meta') - media_url, is_live = self._get_media_url(media_meta) - video_id = media.get('media_id') or container.get('media_container_id') - formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4') - return { - 'id': str(video_id), - 'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)), - 'description': container.get('media_container_description'), - 'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images')), - 'timestamp': parse_iso8601(container.get('created_at')), - 'view_count': int_or_none(stream.get('stream_current_viewers' if is_live else 'stream_total_viewers')), - 'is_live': is_live, - 'formats': formats, - 'subtitles': subtitles, - } - - def _get_container(self, url): - raise NotImplementedError('Subclass for get media container') - - def _get_media_url(self, media_meta): - raise NotImplementedError('Subclass for get media url') - - -class WASDTVStreamIE(WASDTVBaseIE): - IE_NAME = 'wasdtv:stream' - _VALID_URL = r'https?://wasd\.tv/(?P[^/#?]+)$' - _TESTS = [{ - 'url': 'https://wasd.tv/24_7', - 'info_dict': { - 'id': '559738', - 'ext': 'mp4', - 'title': 'Live 24/7 Music', - 'description': '24/7 Music', - 'timestamp': int, - 'upload_date': r're:^\d{8}$', - 'is_live': True, - 'view_count': int, - }, - }] - - def _get_container(self, url): - nickname = self._match_id(url) - channel = self._fetch(f'channels/nicknames/{nickname}', video_id=nickname, description='channel') - channel_id = channel.get('channel_id') - containers = self._fetch( - 'v2/media-containers', channel_id, 'running media containers', - query={ - 'channel_id': channel_id, - 'media_container_type': 'SINGLE', - 'media_container_status': 'RUNNING', - }) - if not containers: - raise ExtractorError(f'{nickname} is offline', expected=True) - return containers[0] - - def _get_media_url(self, media_meta): - return media_meta['media_url'], True - - -class WASDTVRecordIE(WASDTVBaseIE): - IE_NAME = 'wasdtv:record' - _VALID_URL = r'https?://wasd\.tv/[^/#?]+(?:/videos)?\?record=(?P\d+)$' - _TESTS = [{ - 'url': 'https://wasd.tv/spacemita/videos?record=907755', - 'md5': 'c9899dd85be4cc997816ff9f9ca516ce', - 'info_dict': { - 'id': '906825', - 'ext': 'mp4', - 'title': 'Музыкальный', - 'description': 'md5:f510388d929ff60ae61d4c3cab3137cc', - 'timestamp': 1645812079, - 'upload_date': '20220225', - 'thumbnail': r're:^https?://.+\.jpg', - 'is_live': False, - 'view_count': int, - }, - }, { - 'url': 'https://wasd.tv/spacemita?record=907755', - 'only_matching': True, - }] - - def _get_container(self, url): - container_id = self._match_id(url) - return self._fetch( - f'v2/media-containers/{container_id}', container_id, 'media container') - - def _get_media_url(self, media_meta): - media_archive_url = media_meta.get('media_archive_url') - if media_archive_url: - return media_archive_url, False - return media_meta['media_url'], True - - -class WASDTVClipIE(WASDTVBaseIE): - IE_NAME = 'wasdtv:clip' - _VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P\d+)$' - _TESTS = [{ - 'url': 'https://wasd.tv/spacemita/clips?clip=26804', - 'md5': '818885e720143d7a4e776ff66fcff148', - 'info_dict': { - 'id': '26804', - 'ext': 'mp4', - 'title': 'Пуш флексит на голове стримера', - 'timestamp': 1646682908, - 'upload_date': '20220307', - 'thumbnail': r're:^https?://.+\.jpg', - 'view_count': int, - }, - }] - - def _real_extract(self, url): - clip_id = self._match_id(url) - clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip') - clip_data = clip.get('clip_data') - formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4') - return { - 'id': clip_id, - 'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)), - 'thumbnails': self._extract_thumbnails(clip_data.get('preview')), - 'timestamp': parse_iso8601(clip.get('created_at')), - 'view_count': int_or_none(clip.get('clip_views_count')), - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/yt_dlp/extractor/zetland.py b/yt_dlp/extractor/zetland.py new file mode 100644 index 0000000000..055a643b3c --- /dev/null +++ b/yt_dlp/extractor/zetland.py @@ -0,0 +1,71 @@ +from .common import InfoExtractor +from ..utils import merge_dicts, unified_timestamp, url_or_none +from ..utils.traversal import traverse_obj + + +class ZetlandDKArticleIE(InfoExtractor): + _VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P(?P\w{8})-(?P\w{8})-(?:\w{5}))' + _TESTS = [{ + 'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel', + 'info_dict': { + 'id': 'sO9aq2MY-a81VP3BY-66e69', + 'ext': 'mp3', + 'modified_date': '20240118', + 'title': 'Afsnit 1: “Det føltes som en kidnapning.” ', + 'upload_date': '20240116', + 'uploader_id': 'a81VP3BY', + 'modified_timestamp': 1705568739, + 'release_timestamp': 1705377592, + 'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY', + 'uploader': 'Helle Fuusager', + 'release_date': '20240116', + 'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg', + 'description': 'md5:9619d426772c133f5abb26db27f26a01', + 'timestamp': 1705377592, + 'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14', + } + + }] + + def _real_extract(self, url): + display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') + webpage = self._download_webpage(url, display_id) + + next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps'] + story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story')) + + formats = [] + for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})): + formats.append({ + 'url': audio_url, + 'vcodec': 'none', + }) + + return merge_dicts({ + 'id': display_id, + 'formats': formats, + 'uploader_id': uploader_id + }, traverse_obj(story_data, { + 'title': ((('story_content', 'content', 'title'), 'title'), {str}), + 'uploader': ('sharer', 'name'), + 'uploader_id': ('sharer', 'sharer_id'), + 'description': ('story_content', 'content', 'socialDescription'), + 'series_id': ('story_content', 'meta', 'seriesId'), + 'release_timestamp': ('published_at', {unified_timestamp}), + 'modified_timestamp': ('revised_at', {unified_timestamp}), + }, get_all=False), traverse_obj(next_js_data, ('metaInfo', { + 'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}), + 'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}), + 'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}), + 'uploader_url': ('ld', 'author', 'url', {url_or_none}), + 'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}), + 'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}), + 'release_timestamp': ('ld', 'datePublished', {unified_timestamp}), + 'timestamp': ('ld', 'dateCreated', {unified_timestamp}), + }), get_all=False), { + 'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage), + 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage), + 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage), + 'uploader': self._html_search_meta(['author'], webpage), + 'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)), + }, self._search_json_ld(webpage, display_id, fatal=False))