Merge branch 'yt-dlp:master' into pr/fb_parsedata_error

This commit is contained in:
bashonly 2024-02-15 17:02:42 -06:00
commit 3302588c77
No known key found for this signature in database
GPG key ID: 783F096F253D15B0
43 changed files with 2765 additions and 1050 deletions

View file

@ -107,10 +107,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"
- uses: conda-incubator/setup-miniconda@v2 - uses: conda-incubator/setup-miniconda@v3
with: with:
miniforge-variant: Mambaforge miniforge-variant: Mambaforge
use-mamba: true use-mamba: true
@ -121,16 +121,14 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | run: |
sudo apt -y install zip pandoc man sed sudo apt -y install zip pandoc man sed
reqs=$(mktemp) cat > ./requirements.txt << EOF
cat > "$reqs" << EOF
python=3.10.* python=3.10.*
pyinstaller
cffi
brotli-python brotli-python
secretstorage
EOF EOF
sed -E '/^(brotli|secretstorage).*/d' requirements.txt >> "$reqs" python devscripts/install_deps.py --print \
mamba create -n build --file "$reqs" --exclude brotli --exclude brotlicffi \
--include secretstorage --include pyinstaller >> ./requirements.txt
mamba create -n build --file ./requirements.txt
- name: Prepare - name: Prepare
run: | run: |
@ -144,9 +142,9 @@ jobs:
run: | run: |
unset LD_LIBRARY_PATH # Harmful; set by setup-python unset LD_LIBRARY_PATH # Harmful; set by setup-python
conda activate build conda activate build
python pyinst.py --onedir python -m bundle.pyinstaller --onedir
(cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
python pyinst.py python -m bundle.pyinstaller
mv ./dist/yt-dlp_linux ./yt-dlp_linux mv ./dist/yt-dlp_linux ./yt-dlp_linux
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
@ -164,13 +162,15 @@ jobs:
done done
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }}
path: | path: |
yt-dlp yt-dlp
yt-dlp.tar.gz yt-dlp.tar.gz
yt-dlp_linux yt-dlp_linux
yt-dlp_linux.zip yt-dlp_linux.zip
compression-level: 0
linux_arm: linux_arm:
needs: process needs: process
@ -201,17 +201,18 @@ jobs:
dockerRunArgs: --volume "${PWD}/repo:/repo" dockerRunArgs: --volume "${PWD}/repo:/repo"
install: | # Installing Python 3.10 from the Deadsnakes repo raises errors install: | # Installing Python 3.10 from the Deadsnakes repo raises errors
apt update apt update
apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip
python3.8 -m pip install -U pip setuptools wheel python3.8 -m pip install -U pip setuptools wheel
# Cannot access requirements.txt from the repo directory at this stage # Cannot access any files from the repo directory at this stage
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi
run: | run: |
cd repo cd repo
python3.8 -m pip install -U Pyinstaller secretstorage -r requirements.txt # Cached version may be out of date python3.8 devscripts/install_deps.py -o --include build
python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date
python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python3.8 devscripts/make_lazy_extractors.py python3.8 devscripts/make_lazy_extractors.py
python3.8 pyinst.py python3.8 -m bundle.pyinstaller
if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then
arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}"
@ -224,10 +225,12 @@ jobs:
fi fi
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7 path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0
macos: macos:
needs: process needs: process
@ -240,9 +243,10 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | run: |
brew install coreutils brew install coreutils
python3 -m pip install -U --user pip setuptools wheel python3 devscripts/install_deps.py --user -o --include build
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
# We need to ignore wheels otherwise we break universal2 builds # We need to ignore wheels otherwise we break universal2 builds
python3 -m pip install -U --user --no-binary :all: Pyinstaller -r requirements.txt python3 -m pip install -U --user --no-binary :all: -r requirements.txt
- name: Prepare - name: Prepare
run: | run: |
@ -250,9 +254,9 @@ jobs:
python3 devscripts/make_lazy_extractors.py python3 devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
python3 pyinst.py --target-architecture universal2 --onedir python3 -m bundle.pyinstaller --target-architecture universal2 --onedir
(cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
python3 pyinst.py --target-architecture universal2 python3 -m bundle.pyinstaller --target-architecture universal2
- name: Verify --update-to - name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION if: vars.UPDATE_TO_VERIFICATION
@ -265,11 +269,13 @@ jobs:
[[ "$version" != "$downgraded_version" ]] [[ "$version" != "$downgraded_version" ]]
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }}
path: | path: |
dist/yt-dlp_macos dist/yt-dlp_macos
dist/yt-dlp_macos.zip dist/yt-dlp_macos.zip
compression-level: 0
macos_legacy: macos_legacy:
needs: process needs: process
@ -293,8 +299,8 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | run: |
brew install coreutils brew install coreutils
python3 -m pip install -U --user pip setuptools wheel python3 devscripts/install_deps.py --user -o --include build
python3 -m pip install -U --user Pyinstaller -r requirements.txt python3 devscripts/install_deps.py --user --include pyinstaller
- name: Prepare - name: Prepare
run: | run: |
@ -302,7 +308,7 @@ jobs:
python3 devscripts/make_lazy_extractors.py python3 devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
python3 pyinst.py python3 -m bundle.pyinstaller
mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy
- name: Verify --update-to - name: Verify --update-to
@ -316,10 +322,12 @@ jobs:
[[ "$version" != "$downgraded_version" ]] [[ "$version" != "$downgraded_version" ]]
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }}
path: | path: |
dist/yt-dlp_macos_legacy dist/yt-dlp_macos_legacy
compression-level: 0
windows: windows:
needs: process needs: process
@ -328,13 +336,14 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: # 3.8 is used for Win7 support with: # 3.8 is used for Win7 support
python-version: "3.8" python-version: "3.8"
- name: Install Requirements - name: Install Requirements
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python -m pip install -U pip setuptools wheel py2exe python devscripts/install_deps.py -o --include build
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt python devscripts/install_deps.py --include py2exe
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -342,10 +351,10 @@ jobs:
python devscripts/make_lazy_extractors.py python devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
python setup.py py2exe python -m bundle.py2exe
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
python pyinst.py python -m bundle.pyinstaller
python pyinst.py --onedir python -m bundle.pyinstaller --onedir
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
- name: Verify --update-to - name: Verify --update-to
@ -362,12 +371,14 @@ jobs:
} }
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }}
path: | path: |
dist/yt-dlp.exe dist/yt-dlp.exe
dist/yt-dlp_min.exe dist/yt-dlp_min.exe
dist/yt-dlp_win.zip dist/yt-dlp_win.zip
compression-level: 0
windows32: windows32:
needs: process needs: process
@ -376,14 +387,15 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: with:
python-version: "3.8" python-version: "3.8"
architecture: "x86" architecture: "x86"
- name: Install Requirements - name: Install Requirements
run: | run: |
python -m pip install -U pip setuptools wheel python devscripts/install_deps.py -o --include build
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -391,7 +403,7 @@ jobs:
python devscripts/make_lazy_extractors.py python devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
python pyinst.py python -m bundle.pyinstaller
- name: Verify --update-to - name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION if: vars.UPDATE_TO_VERIFICATION
@ -407,10 +419,12 @@ jobs:
} }
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }}
path: | path: |
dist/yt-dlp_x86.exe dist/yt-dlp_x86.exe
compression-level: 0
meta_files: meta_files:
if: inputs.meta_files && always() && !cancelled() if: inputs.meta_files && always() && !cancelled()
@ -424,7 +438,11 @@ jobs:
- windows32 - windows32
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/download-artifact@v3 - uses: actions/download-artifact@v4
with:
path: artifact
pattern: build-*
merge-multiple: true
- name: Make SHA2-SUMS files - name: Make SHA2-SUMS files
run: | run: |
@ -459,8 +477,10 @@ jobs:
done done
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }}
path: | path: |
SHA*SUMS*
_update_spec _update_spec
SHA*SUMS*
compression-level: 0

View file

@ -49,11 +49,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install test requirements - name: Install test requirements
run: pip install pytest -r requirements.txt run: python3 ./devscripts/install_deps.py --include dev
- name: Run tests - name: Run tests
continue-on-error: False continue-on-error: False
run: | run: |

View file

@ -11,11 +11,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: 3.9 python-version: 3.9
- name: Install test requirements - name: Install test requirements
run: pip install pytest -r requirements.txt run: python3 ./devscripts/install_deps.py --include dev
- name: Run tests - name: Run tests
continue-on-error: true continue-on-error: true
run: python3 ./devscripts/run_tests.py download run: python3 ./devscripts/run_tests.py download
@ -38,11 +38,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install test requirements - name: Install test requirements
run: pip install pytest -r requirements.txt run: python3 ./devscripts/install_deps.py --include dev
- name: Run tests - name: Run tests
continue-on-error: true continue-on-error: true
run: python3 ./devscripts/run_tests.py download run: python3 ./devscripts/run_tests.py download

View file

@ -11,11 +11,11 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python 3.8 - name: Set up Python 3.8
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: '3.8' python-version: '3.8'
- name: Install test requirements - name: Install test requirements
run: pip install pytest -r requirements.txt run: python3 ./devscripts/install_deps.py --include dev
- name: Run tests - name: Run tests
run: | run: |
python3 -m yt_dlp -v || true python3 -m yt_dlp -v || true
@ -26,10 +26,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
- name: Install flake8 - name: Install flake8
run: pip install flake8 run: python3 ./devscripts/install_deps.py -o --include dev
- name: Make lazy extractors - name: Make lazy extractors
run: python devscripts/make_lazy_extractors.py run: python3 ./devscripts/make_lazy_extractors.py
- name: Run flake8 - name: Run flake8
run: flake8 . run: flake8 .

View file

@ -6,8 +6,10 @@ on:
paths: paths:
- "yt_dlp/**.py" - "yt_dlp/**.py"
- "!yt_dlp/version.py" - "!yt_dlp/version.py"
- "setup.py" - "bundle/*.py"
- "pyinst.py" - "pyproject.toml"
- "Makefile"
- ".github/workflows/build.yml"
concurrency: concurrency:
group: release-master group: release-master
permissions: permissions:

View file

@ -18,7 +18,14 @@ jobs:
- name: Check for new commits - name: Check for new commits
id: check_for_new_commits id: check_for_new_commits
run: | run: |
relevant_files=("yt_dlp/*.py" ':!yt_dlp/version.py' "setup.py" "pyinst.py") relevant_files=(
"yt_dlp/*.py"
':!yt_dlp/version.py'
"bundle/*.py"
"pyproject.toml"
"Makefile"
".github/workflows/build.yml"
)
echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT" echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT"
release: release:

View file

@ -71,7 +71,7 @@ jobs:
with: with:
fetch-depth: 0 fetch-depth: 0
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"
@ -246,15 +246,16 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v4 with:
fetch-depth: 0
- uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"
- name: Install Requirements - name: Install Requirements
run: | run: |
sudo apt -y install pandoc man sudo apt -y install pandoc man
python -m pip install -U pip setuptools wheel twine python devscripts/install_deps.py -o --include build
python -m pip install -U -r requirements.txt
- name: Prepare - name: Prepare
env: env:
@ -266,14 +267,19 @@ jobs:
run: | run: |
python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}" python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}"
python devscripts/make_lazy_extractors.py python devscripts/make_lazy_extractors.py
sed -i -E "s/(name=')[^']+(', # package name)/\1${{ env.pypi_project }}\2/" setup.py sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml
- name: Build - name: Build
run: | run: |
rm -rf dist/* rm -rf dist/*
make pypi-files make pypi-files
printf '%s\n\n' \
'Official repository: <https://github.com/yt-dlp/yt-dlp>' \
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github' > ./README.md.new
cat ./README.md >> ./README.md.new && mv -f ./README.md.new ./README.md
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
python setup.py sdist bdist_wheel make clean-cache
python -m build --no-isolation .
- name: Publish to PyPI - name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
@ -290,8 +296,12 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- uses: actions/download-artifact@v3 - uses: actions/download-artifact@v4
- uses: actions/setup-python@v4 with:
path: artifact
pattern: build-*
merge-multiple: true
- uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"

View file

@ -1,10 +0,0 @@
include AUTHORS
include Changelog.md
include LICENSE
include README.md
include completions/*/*
include supportedsites.md
include yt-dlp.1
include requirements.txt
recursive-include devscripts *
recursive-include test *

View file

@ -6,11 +6,11 @@ doc: README.md CONTRIBUTING.md issuetemplates supportedsites
ot: offlinetest ot: offlinetest
tar: yt-dlp.tar.gz tar: yt-dlp.tar.gz
# Keep this list in sync with MANIFEST.in # Keep this list in sync with pyproject.toml includes/artifacts
# intended use: when building a source distribution, # intended use: when building a source distribution,
# make pypi-files && python setup.py sdist # make pypi-files && python3 -m build -sn .
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
completions yt-dlp.1 requirements.txt setup.cfg devscripts/* test/* completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites .PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
@ -21,7 +21,7 @@ clean-test:
*.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp *.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
clean-dist: clean-dist:
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
clean-cache: clean-cache:
find . \( \ find . \( \
-type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
@ -38,11 +38,13 @@ MANDIR ?= $(PREFIX)/man
SHAREDIR ?= $(PREFIX)/share SHAREDIR ?= $(PREFIX)/share
PYTHON ?= /usr/bin/env python3 PYTHON ?= /usr/bin/env python3
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local # $(shell) and $(error) are no-ops in BSD Make and the != variable assignment operator is not supported by GNU Make <4.0
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) VERSION_CHECK != echo supported
VERSION_CHECK ?= $(error GNU Make 4+ or BSD Make is required)
CHECK_VERSION := $(VERSION_CHECK)
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 # set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) MARKDOWN != if [ "`pandoc -v | head -n1 | cut -d' ' -f2 | head -c1`" -ge "2" ]; then echo markdown-smart; else echo markdown; fi
install: lazy-extractors yt-dlp yt-dlp.1 completions install: lazy-extractors yt-dlp yt-dlp.1 completions
mkdir -p $(DESTDIR)$(BINDIR) mkdir -p $(DESTDIR)$(BINDIR)
@ -73,24 +75,24 @@ test:
offlinetest: codetest offlinetest: codetest
$(PYTHON) -m pytest -k "not download" $(PYTHON) -m pytest -k "not download"
# XXX: This is hard to maintain CODE_FOLDERS != find yt_dlp -type f -name '__init__.py' -exec dirname {} \+ | grep -v '/__' | sort
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking CODE_FILES != for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py yt-dlp: $(CODE_FILES)
mkdir -p zip mkdir -p zip
for d in $(CODE_FOLDERS) ; do \ for d in $(CODE_FOLDERS) ; do \
mkdir -p zip/$$d ;\ mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\ cp -pPR $$d/*.py zip/$$d/ ;\
done done
touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py (cd zip && touch -t 200001010101 $(CODE_FILES))
mv zip/yt_dlp/__main__.py zip/ mv zip/yt_dlp/__main__.py zip/
cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py __main__.py (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
rm -rf zip rm -rf zip
echo '#!$(PYTHON)' > yt-dlp echo '#!$(PYTHON)' > yt-dlp
cat yt-dlp.zip >> yt-dlp cat yt-dlp.zip >> yt-dlp
rm yt-dlp.zip rm yt-dlp.zip
chmod a+x yt-dlp chmod a+x yt-dlp
README.md: yt_dlp/*.py yt_dlp/*/*.py devscripts/make_readme.py README.md: $(CODE_FILES) devscripts/make_readme.py
COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py
CONTRIBUTING.md: README.md devscripts/make_contributing.py CONTRIBUTING.md: README.md devscripts/make_contributing.py
@ -115,19 +117,19 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1 pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
rm -f yt-dlp.1.temp.md rm -f yt-dlp.1.temp.md
completions/bash/yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/bash-completion.in completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in
mkdir -p completions/bash mkdir -p completions/bash
$(PYTHON) devscripts/bash-completion.py $(PYTHON) devscripts/bash-completion.py
completions/zsh/_yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/zsh-completion.in completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in
mkdir -p completions/zsh mkdir -p completions/zsh
$(PYTHON) devscripts/zsh-completion.py $(PYTHON) devscripts/zsh-completion.py
completions/fish/yt-dlp.fish: yt_dlp/*.py yt_dlp/*/*.py devscripts/fish-completion.in completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
mkdir -p completions/fish mkdir -p completions/fish
$(PYTHON) devscripts/fish-completion.py $(PYTHON) devscripts/fish-completion.py
_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py') _EXTRACTOR_FILES != find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py'
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@ $(PYTHON) devscripts/make_lazy_extractors.py $@
@ -141,15 +143,12 @@ yt-dlp.tar.gz: all
--exclude '__pycache__' \ --exclude '__pycache__' \
--exclude '.pytest_cache' \ --exclude '.pytest_cache' \
--exclude '.git' \ --exclude '.git' \
--exclude '__pyinstaller' \
-- \ -- \
README.md supportedsites.md Changelog.md LICENSE \ README.md supportedsites.md Changelog.md LICENSE \
CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \
Makefile MANIFEST.in yt-dlp.1 README.txt completions \ Makefile yt-dlp.1 README.txt completions .gitignore \
setup.py setup.cfg yt-dlp yt_dlp requirements.txt \ setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test
devscripts test
AUTHORS: .mailmap AUTHORS:
git shortlog -s -n | cut -f2 | sort > AUTHORS git shortlog -s -n HEAD | cut -f2 | sort > AUTHORS
.mailmap:
git shortlog -s -e -n | awk '!(out[$$NF]++) { $$1="";sub(/^[ \t]+/,""); print}' > .mailmap

View file

@ -321,19 +321,21 @@ ### Deprecated
## COMPILE ## COMPILE
### Standalone PyInstaller Builds ### Standalone PyInstaller Builds
To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). Once you have all the necessary dependencies installed, simply run `pyinst.py`. The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. You can run the following commands:
python3 -m pip install -U pyinstaller -r requirements.txt ```
python3 devscripts/make_lazy_extractors.py python3 devscripts/install_deps.py --include pyinstaller
python3 pyinst.py python3 devscripts/make_lazy_extractors.py
python3 -m bundle.pyinstaller
```
On some systems, you may need to use `py` or `python` instead of `python3`. On some systems, you may need to use `py` or `python` instead of `python3`.
`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). `bundle/pyinstaller.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. **Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly. **Important**: Running `pyinstaller` directly **without** using `bundle/pyinstaller.py` is **not** officially supported. This may or may not work correctly.
### Platform-independent Binary (UNIX) ### Platform-independent Binary (UNIX)
You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
@ -346,14 +348,17 @@ ### Standalone Py2Exe Builds (Windows)
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run. While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
If you wish to build it anyway, install Python and py2exe, and then simply run `setup.py py2exe` If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
py -m pip install -U py2exe -r requirements.txt ```
py devscripts/make_lazy_extractors.py py devscripts/install_deps.py --include py2exe
py setup.py py2exe py devscripts/make_lazy_extractors.py
py -m bundle.py2exe
```
### Related scripts ### Related scripts
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
* **`devscripts/update-version.py`** - Update the version number based on current date. * **`devscripts/update-version.py`** - Update the version number based on current date.
* **`devscripts/set-variant.py`** - Set the build variant of the executable. * **`devscripts/set-variant.py`** - Set the build variant of the executable.
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.

1
bundle/__init__.py Normal file
View file

@ -0,0 +1 @@
# Empty file

59
bundle/py2exe.py Executable file
View file

@ -0,0 +1,59 @@
#!/usr/bin/env python3
# Allow execution from anywhere
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import warnings
from py2exe import freeze
from devscripts.utils import read_version
VERSION = read_version()
def main():
warnings.warn(
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
return freeze(
console=[{
'script': './yt_dlp/__main__.py',
'dest_base': 'yt-dlp',
'icon_resources': [(1, 'devscripts/logo.ico')],
}],
version_info={
'version': VERSION,
'description': 'A youtube-dl fork with additional features and patches',
'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
'product_name': 'yt-dlp',
'product_version': VERSION,
},
options={
'bundle_files': 0,
'compressed': 1,
'optimize': 2,
'dist_dir': './dist',
'excludes': [
# py2exe cannot import Crypto
'Crypto',
'Cryptodome',
# py2exe appears to confuse this with our socks library.
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
'urllib3.contrib.socks'
],
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
# Modules that are only imported dynamically must be added here
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
},
zipfile=None,
)
if __name__ == '__main__':
main()

2
pyinst.py → bundle/pyinstaller.py Normal file → Executable file
View file

@ -4,7 +4,7 @@
import os import os
import sys import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import platform import platform

66
devscripts/install_deps.py Executable file
View file

@ -0,0 +1,66 @@
#!/usr/bin/env python3
# Allow execution from anywhere
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import argparse
import re
import subprocess
from devscripts.tomlparse import parse_toml
from devscripts.utils import read_file
def parse_args():
parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
parser.add_argument(
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
parser.add_argument(
'-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency')
parser.add_argument(
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
parser.add_argument(
'-o', '--only-optional', action='store_true', help='Only install optional dependencies')
parser.add_argument(
'-p', '--print', action='store_true', help='Only print a requirements.txt to stdout')
parser.add_argument(
'-u', '--user', action='store_true', help='Install with pip as --user')
return parser.parse_args()
def main():
args = parse_args()
toml_data = parse_toml(read_file(args.input))
deps = toml_data['project']['dependencies']
targets = deps.copy() if not args.only_optional else []
for exclude in args.exclude or []:
for dep in deps:
simplified_dep = re.match(r'[\w-]+', dep)[0]
if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep):
targets.remove(dep)
optional_deps = toml_data['project']['optional-dependencies']
for include in args.include or []:
group = optional_deps.get(include)
if group:
targets.extend(group)
if args.print:
for target in targets:
print(target)
return
pip_args = [sys.executable, '-m', 'pip', 'install', '-U']
if args.user:
pip_args.append('--user')
pip_args.extend(targets)
return subprocess.call(pip_args)
if __name__ == '__main__':
sys.exit(main())

189
devscripts/tomlparse.py Executable file
View file

@ -0,0 +1,189 @@
#!/usr/bin/env python3
"""
Simple parser for spec compliant toml files
A simple toml parser for files that comply with the spec.
Should only be used to parse `pyproject.toml` for `install_deps.py`.
IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
"""
from __future__ import annotations
import datetime
import json
import re
WS = r'(?:[\ \t]*)'
STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
EQUALS_RE = re.compile(rf'={WS}')
WS_RE = re.compile(WS)
_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
def parse_key(value: str):
for match in SINGLE_KEY_RE.finditer(value):
if match[0][0] == '"':
yield json.loads(match[0])
elif match[0][0] == '\'':
yield match[0][1:-1]
else:
yield match[0]
def get_target(root: dict, paths: list[str], is_list=False):
target = root
for index, key in enumerate(paths, 1):
use_list = is_list and index == len(paths)
result = target.get(key)
if result is None:
result = [] if use_list else {}
target[key] = result
if isinstance(result, dict):
target = result
elif use_list:
target = {}
result.append(target)
else:
target = result[-1]
assert isinstance(target, dict)
return target
def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
index += 1
if match := ws_re.match(data, index):
index = match.end()
while data[index] != end:
index = yield True, index
if match := ws_re.match(data, index):
index = match.end()
if data[index] == ',':
index += 1
if match := ws_re.match(data, index):
index = match.end()
assert data[index] == end
yield False, index + 1
def parse_value(data: str, index: int):
if data[index] == '[':
result = []
indices = parse_enclosed(data, index, ']', LIST_WS_RE)
valid, index = next(indices)
while valid:
index, value = parse_value(data, index)
result.append(value)
valid, index = indices.send(index)
return index, result
if data[index] == '{':
result = {}
indices = parse_enclosed(data, index, '}', WS_RE)
valid, index = next(indices)
while valid:
valid, index = indices.send(parse_kv_pair(data, index, result))
return index, result
if match := STRING_RE.match(data, index):
return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
match = LEFTOVER_VALUE_RE.match(data, index)
assert match
value = match[0].strip()
for func in [
int,
float,
datetime.time.fromisoformat,
datetime.date.fromisoformat,
datetime.datetime.fromisoformat,
{'true': True, 'false': False}.get,
]:
try:
value = func(value)
break
except Exception:
pass
return match.end(), value
def parse_kv_pair(data: str, index: int, target: dict):
match = KEY_RE.match(data, index)
if not match:
return None
*keys, key = parse_key(match[0])
match = EQUALS_RE.match(data, match.end())
assert match
index = match.end()
index, value = parse_value(data, index)
get_target(target, keys)[key] = value
return index
def parse_toml(data: str):
root = {}
target = root
index = 0
while True:
match = EXPRESSION_RE.search(data, index)
if not match:
break
if match.group('subtable'):
index = match.end()
path, is_list = match.group('path', 'is_list')
target = get_target(root, list(parse_key(path)), bool(is_list))
continue
index = parse_kv_pair(data, match.start(), target)
assert index is not None
return root
def main():
import argparse
from pathlib import Path
parser = argparse.ArgumentParser()
parser.add_argument('infile', type=Path, help='The TOML file to read as input')
args = parser.parse_args()
with args.infile.open('r', encoding='utf-8') as file:
data = file.read()
def default(obj):
if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
return obj.isoformat()
print(json.dumps(parse_toml(data), default=default))
if __name__ == '__main__':
main()

View file

@ -1,5 +1,120 @@
[build-system] [build-system]
build-backend = 'setuptools.build_meta' requires = ["hatchling"]
# https://github.com/yt-dlp/yt-dlp/issues/5941 build-backend = "hatchling.build"
# https://github.com/pypa/distutils/issues/17
requires = ['setuptools > 50'] [project]
name = "yt-dlp"
maintainers = [
{name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"},
{name = "Grub4K", email = "contact@grub4k.xyz"},
{name = "bashonly", email = "bashonly@protonmail.com"},
]
description = "A youtube-dl fork with additional features and patches"
readme = "README.md"
requires-python = ">=3.8"
keywords = [
"youtube-dl",
"video-downloader",
"youtube-downloader",
"sponsorblock",
"youtube-dlc",
"yt-dlp",
]
license = {file = "LICENSE"}
classifiers = [
"Topic :: Multimedia :: Video",
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"Programming Language :: Python",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: Implementation",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"License :: OSI Approved :: The Unlicense (Unlicense)",
"Operating System :: OS Independent",
]
dynamic = ["version"]
dependencies = [
"brotli; implementation_name=='cpython'",
"brotlicffi; implementation_name!='cpython'",
"certifi",
"mutagen",
"pycryptodomex",
"requests>=2.31.0,<3",
"urllib3>=1.26.17,<3",
"websockets>=12.0",
]
[project.optional-dependencies]
secretstorage = [
"cffi",
"secretstorage",
]
build = [
"build",
"hatchling",
"pip",
"wheel",
]
dev = [
"flake8",
"isort",
"pytest",
]
pyinstaller = ["pyinstaller>=6.3"]
py2exe = ["py2exe>=0.12"]
[project.urls]
Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
Repository = "https://github.com/yt-dlp/yt-dlp"
Tracker = "https://github.com/yt-dlp/yt-dlp/issues"
Funding = "https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators"
[project.scripts]
yt-dlp = "yt_dlp:main"
[project.entry-points.pyinstaller40]
hook-dirs = "yt_dlp.__pyinstaller:get_hook_dirs"
[tool.hatch.build.targets.sdist]
include = [
"/yt_dlp",
"/devscripts",
"/test",
"/.gitignore", # included by default, needed for auto-excludes
"/Changelog.md",
"/LICENSE", # included as license
"/pyproject.toml", # included by default
"/README.md", # included as readme
"/setup.cfg",
"/supportedsites.md",
]
exclude = ["/yt_dlp/__pyinstaller"]
artifacts = [
"/yt_dlp/extractor/lazy_extractors.py",
"/completions",
"/AUTHORS", # included by default
"/README.txt",
"/yt-dlp.1",
]
[tool.hatch.build.targets.wheel]
packages = ["yt_dlp"]
exclude = ["/yt_dlp/__pyinstaller"]
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
[tool.hatch.build.targets.wheel.shared-data]
"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp"
"completions/zsh/_yt-dlp" = "share/zsh/site-functions/_yt-dlp"
"completions/fish/yt-dlp.fish" = "share/fish/vendor_completions.d/yt-dlp.fish"
"README.txt" = "share/doc/yt_dlp/README.txt"
"yt-dlp.1" = "share/man/man1/yt-dlp.1"
[tool.hatch.version]
path = "yt_dlp/version.py"
pattern = "_pkg_version = '(?P<version>[^']+)'"

View file

@ -1,8 +0,0 @@
mutagen
pycryptodomex
brotli; implementation_name=='cpython'
brotlicffi; implementation_name!='cpython'
certifi
requests>=2.31.0,<3
urllib3>=1.26.17,<3
websockets>=12.0

View file

@ -1,7 +1,3 @@
[wheel]
universal = true
[flake8] [flake8]
exclude = build,venv,.tox,.git,.pytest_cache exclude = build,venv,.tox,.git,.pytest_cache
ignore = E402,E501,E731,E741,W503 ignore = E402,E501,E731,E741,W503

183
setup.py
View file

@ -1,183 +0,0 @@
#!/usr/bin/env python3
# Allow execution from anywhere
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import subprocess
import warnings
try:
from setuptools import Command, find_packages, setup
setuptools_available = True
except ImportError:
from distutils.core import Command, setup
setuptools_available = False
from devscripts.utils import read_file, read_version
VERSION = read_version(varname='_pkg_version')
DESCRIPTION = 'A youtube-dl fork with additional features and patches'
LONG_DESCRIPTION = '\n\n'.join((
'Official repository: <https://github.com/yt-dlp/yt-dlp>',
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
read_file('README.md')))
REQUIREMENTS = read_file('requirements.txt').splitlines()
def packages():
if setuptools_available:
return find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'))
return [
'yt_dlp', 'yt_dlp.extractor', 'yt_dlp.downloader', 'yt_dlp.postprocessor', 'yt_dlp.compat',
]
def py2exe_params():
warnings.warn(
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
return {
'console': [{
'script': './yt_dlp/__main__.py',
'dest_base': 'yt-dlp',
'icon_resources': [(1, 'devscripts/logo.ico')],
}],
'version_info': {
'version': VERSION,
'description': DESCRIPTION,
'comments': LONG_DESCRIPTION.split('\n')[0],
'product_name': 'yt-dlp',
'product_version': VERSION,
},
'options': {
'bundle_files': 0,
'compressed': 1,
'optimize': 2,
'dist_dir': './dist',
'excludes': [
# py2exe cannot import Crypto
'Crypto',
'Cryptodome',
# py2exe appears to confuse this with our socks library.
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
'urllib3.contrib.socks'
],
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
# Modules that are only imported dynamically must be added here
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
},
'zipfile': None,
}
def build_params():
files_spec = [
('share/bash-completion/completions', ['completions/bash/yt-dlp']),
('share/zsh/site-functions', ['completions/zsh/_yt-dlp']),
('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']),
('share/doc/yt_dlp', ['README.txt']),
('share/man/man1', ['yt-dlp.1'])
]
data_files = []
for dirname, files in files_spec:
resfiles = []
for fn in files:
if not os.path.exists(fn):
warnings.warn(f'Skipping file {fn} since it is not present. Try running " make pypi-files " first')
else:
resfiles.append(fn)
data_files.append((dirname, resfiles))
params = {'data_files': data_files}
if setuptools_available:
params['entry_points'] = {
'console_scripts': ['yt-dlp = yt_dlp:main'],
'pyinstaller40': ['hook-dirs = yt_dlp.__pyinstaller:get_hook_dirs'],
}
else:
params['scripts'] = ['yt-dlp']
return params
class build_lazy_extractors(Command):
description = 'Build the extractor lazy loading module'
user_options = []
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
if self.dry_run:
print('Skipping build of lazy extractors in dry run mode')
return
subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
def main():
if sys.argv[1:2] == ['py2exe']:
params = py2exe_params()
try:
from py2exe import freeze
except ImportError:
import py2exe # noqa: F401
warnings.warn('You are using an outdated version of py2exe. Support for this version will be removed in the future')
params['console'][0].update(params.pop('version_info'))
params['options'] = {'py2exe': params.pop('options')}
else:
return freeze(**params)
else:
params = build_params()
setup(
name='yt-dlp', # package name (do not change/remove comment)
version=VERSION,
maintainer='pukkandan',
maintainer_email='pukkandan.ytdlp@gmail.com',
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
long_description_content_type='text/markdown',
url='https://github.com/yt-dlp/yt-dlp',
packages=packages(),
install_requires=REQUIREMENTS,
python_requires='>=3.8',
project_urls={
'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme',
'Source': 'https://github.com/yt-dlp/yt-dlp',
'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues',
'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators',
},
classifiers=[
'Topic :: Multimedia :: Video',
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'Programming Language :: Python',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: Implementation',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'License :: Public Domain',
'Operating System :: OS Independent',
],
cmdclass={'build_lazy_extractors': build_lazy_extractors},
**params
)
main()

View file

@ -257,6 +257,7 @@
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE
from .boosty import BoostyIE
from .bostonglobe import BostonGlobeIE from .bostonglobe import BostonGlobeIE
from .box import BoxIE from .box import BoxIE
from .boxcast import BoxCastVideoIE from .boxcast import BoxCastVideoIE
@ -1247,7 +1248,10 @@
NexxIE, NexxIE,
NexxEmbedIE, NexxEmbedIE,
) )
from .nfb import NFBIE from .nfb import (
NFBIE,
NFBSeriesIE,
)
from .nfhsnetwork import NFHSNetworkIE from .nfhsnetwork import NFHSNetworkIE
from .nfl import ( from .nfl import (
NFLIE, NFLIE,
@ -1284,6 +1288,7 @@
NicovideoTagURLIE, NicovideoTagURLIE,
NiconicoLiveIE, NiconicoLiveIE,
) )
from .ninaprotocol import NinaProtocolIE
from .ninecninemedia import ( from .ninecninemedia import (
NineCNineMediaIE, NineCNineMediaIE,
CPTwentyFourIE, CPTwentyFourIE,
@ -1348,6 +1353,12 @@
NYTimesIE, NYTimesIE,
NYTimesArticleIE, NYTimesArticleIE,
NYTimesCookingIE, NYTimesCookingIE,
NYTimesCookingRecipeIE,
)
from .nuum import (
NuumLiveIE,
NuumTabIE,
NuumMediaIE,
) )
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .nzherald import NZHeraldIE from .nzherald import NZHeraldIE
@ -1390,6 +1401,7 @@
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
ORFFM4StoryIE, ORFFM4StoryIE,
ORFONIE,
ORFRadioIE, ORFRadioIE,
ORFPodcastIE, ORFPodcastIE,
ORFIPTVIE, ORFIPTVIE,
@ -1514,7 +1526,7 @@
PuhuTVSerieIE, PuhuTVSerieIE,
) )
from .pr0gramm import Pr0grammIE from .pr0gramm import Pr0grammIE
from .prankcast import PrankCastIE from .prankcast import PrankCastIE, PrankCastPostIE
from .premiershiprugby import PremiershipRugbyIE from .premiershiprugby import PremiershipRugbyIE
from .presstv import PressTVIE from .presstv import PressTVIE
from .projectveritas import ProjectVeritasIE from .projectveritas import ProjectVeritasIE
@ -2309,11 +2321,6 @@
WashingtonPostIE, WashingtonPostIE,
WashingtonPostArticleIE, WashingtonPostArticleIE,
) )
from .wasdtv import (
WASDTVStreamIE,
WASDTVRecordIE,
WASDTVClipIE,
)
from .wat import WatIE from .wat import WatIE
from .wdr import ( from .wdr import (
WDRIE, WDRIE,
@ -2492,6 +2499,7 @@
Zee5SeriesIE, Zee5SeriesIE,
) )
from .zeenews import ZeeNewsIE from .zeenews import ZeeNewsIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE from .zhihu import ZhihuIE
from .zingmp3 import ( from .zingmp3 import (
ZingMp3IE, ZingMp3IE,

View file

@ -78,14 +78,14 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron', 'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
'md5': '294f18331bb516539d72d85a82887dcc', 'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
'info_dict': { 'info_dict': {
'id': '_xvg/m_cmbatw=', 'id': '_xvg/m_cmbatw=',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411', 'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
'timestamp': 1603092840, 'timestamp': 1666166520,
'upload_date': '20201019', 'upload_date': '20221019',
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg', 'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
}, },
}, { }, {
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn', 'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
@ -117,7 +117,7 @@ class Ant1NewsGrEmbedIE(AntennaBaseIE):
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
_API_PATH = '/news/templates/data/jsonPlayer' _API_PATH = '/templates/data/jsonPlayer'
_TESTS = [{ _TESTS = [{
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377', 'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',

View file

@ -7,6 +7,7 @@
import re import re
import time import time
import urllib.parse import urllib.parse
import uuid
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome from ..dependencies import Cryptodome
@ -1304,6 +1305,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'upload_date': '20211127', 'upload_date': '20211127',
}, },
'playlist_mincount': 513, 'playlist_mincount': 513,
}, {
'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
'info_dict': {
'id': 'BV1DU4y1r7tz',
'ext': 'mp4',
'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
'upload_date': '20220820',
'description': '',
'timestamp': 1661016330,
'uploader_id': '1958703906',
'uploader': '靡烟miya',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'duration': 9552.903,
'tags': list,
'comment_count': int,
'view_count': int,
'like_count': int,
'_old_archive_ids': ['bilibili 687146339_part1'],
},
'params': {'noplaylist': True},
}, { }, {
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1', 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
'info_dict': { 'info_dict': {
@ -1355,6 +1376,11 @@ def _extract_medialist(self, query, list_id):
def _real_extract(self, url): def _real_extract(self, url):
list_id = self._match_id(url) list_id = self._match_id(url)
bvid = traverse_obj(parse_qs(url), ('bvid', 0))
if not self._yes_playlist(list_id, bvid):
return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
webpage = self._download_webpage(url, list_id) webpage = self._download_webpage(url, list_id)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id) initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200: if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
@ -1464,8 +1490,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
IE_DESC = 'Bilibili video search' IE_DESC = 'Bilibili video search'
_MAX_RESULTS = 100000 _MAX_RESULTS = 100000
_SEARCH_KEY = 'bilisearch' _SEARCH_KEY = 'bilisearch'
_TESTS = [{
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'playlist_count': 3,
'info_dict': {
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
},
'playlist': [{
'info_dict': {
'id': 'BV1n44y1Q7sc',
'ext': 'mp4',
'title': '“出道一年我怎么还在等你单推的女人睡觉后开播啊”【一分钟了解靡烟miya】',
'timestamp': 1669889987,
'upload_date': '20221201',
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
'tags': list,
'uploader': '靡烟miya',
'duration': 123.156,
'uploader_id': '1958703906',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 988222410_part1'],
},
}],
}]
def _search_results(self, query): def _search_results(self, query):
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
for page_num in itertools.count(1): for page_num in itertools.count(1):
videos = self._download_json( videos = self._download_json(
'https://api.bilibili.com/x/web-interface/search/type', query, 'https://api.bilibili.com/x/web-interface/search/type', query,

209
yt_dlp/extractor/boosty.py Normal file
View file

@ -0,0 +1,209 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
ExtractorError,
int_or_none,
qualities,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class BoostyIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?boosty\.to/(?P<user>[^/#?]+)/posts/(?P<post_id>[^/#?]+)'
_TESTS = [{
# single ok_video
'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
'info_dict': {
'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
'title': 'phasma_3',
'channel': 'Kuplinov',
'channel_id': '7958701',
'timestamp': 1655031975,
'upload_date': '20220612',
'release_timestamp': 1655049000,
'release_date': '20220612',
'modified_timestamp': 1668680993,
'modified_date': '20221117',
'tags': ['куплинов', 'phasmophobia'],
'like_count': int,
'ext': 'mp4',
'duration': 105,
'view_count': int,
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
},
}, {
# multiple ok_video
'url': 'https://boosty.to/maddyson/posts/0c652798-3b35-471f-8b48-a76a0b28736f',
'info_dict': {
'id': '0c652798-3b35-471f-8b48-a76a0b28736f',
'title': 'то что не пропустил юта6',
'channel': 'Илья Давыдов',
'channel_id': '6808257',
'timestamp': 1694017040,
'upload_date': '20230906',
'release_timestamp': 1694017040,
'release_date': '20230906',
'modified_timestamp': 1694071178,
'modified_date': '20230907',
'like_count': int,
},
'playlist_count': 3,
'playlist': [{
'info_dict': {
'id': 'cc325a9f-a563-41c6-bf47-516c1b506c9a',
'title': 'то что не пропустил юта6',
'channel': 'Илья Давыдов',
'channel_id': '6808257',
'timestamp': 1694017040,
'upload_date': '20230906',
'release_timestamp': 1694017040,
'release_date': '20230906',
'modified_timestamp': 1694071178,
'modified_date': '20230907',
'like_count': int,
'ext': 'mp4',
'duration': 31204,
'view_count': int,
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
},
}, {
'info_dict': {
'id': 'd07b0a72-9493-4512-b54e-55ce468fd4b7',
'title': 'то что не пропустил юта6',
'channel': 'Илья Давыдов',
'channel_id': '6808257',
'timestamp': 1694017040,
'upload_date': '20230906',
'release_timestamp': 1694017040,
'release_date': '20230906',
'modified_timestamp': 1694071178,
'modified_date': '20230907',
'like_count': int,
'ext': 'mp4',
'duration': 25704,
'view_count': int,
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
},
}, {
'info_dict': {
'id': '4a3bba32-78c8-422a-9432-2791aff60b42',
'title': 'то что не пропустил юта6',
'channel': 'Илья Давыдов',
'channel_id': '6808257',
'timestamp': 1694017040,
'upload_date': '20230906',
'release_timestamp': 1694017040,
'release_date': '20230906',
'modified_timestamp': 1694071178,
'modified_date': '20230907',
'like_count': int,
'ext': 'mp4',
'duration': 31867,
'view_count': int,
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
},
}],
}, {
# single external video (youtube)
'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38',
'info_dict': {
'id': 'EXelTnve5lY',
'title': 'Послание Президента Федеральному Собранию | Класс народа',
'upload_date': '20210425',
'channel': 'Денис Чужой',
'tags': 'count:10',
'like_count': int,
'ext': 'mp4',
'duration': 816,
'view_count': int,
'thumbnail': r're:^https://i\.ytimg\.com/',
'age_limit': 0,
'availability': 'public',
'categories': list,
'channel_follower_count': int,
'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w',
'channel_is_verified': bool,
'channel_url': r're:^https://www\.youtube\.com/',
'comment_count': int,
'description': str,
'heatmap': 'count:100',
'live_status': str,
'playable_in_embed': bool,
'uploader': str,
'uploader_id': str,
'uploader_url': r're:^https://www\.youtube\.com/',
},
}]
_MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
def _extract_formats(self, player_urls, video_id):
formats = []
quality = qualities(self._MP4_TYPES)
for player_url in traverse_obj(player_urls, lambda _, v: url_or_none(v['url'])):
url = player_url['url']
format_type = player_url.get('type')
if format_type in ('hls', 'hls_live', 'live_ondemand_hls', 'live_playback_hls'):
formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id='hls', fatal=False))
elif format_type in ('dash', 'dash_live', 'live_playback_dash'):
formats.extend(self._extract_mpd_formats(url, video_id, mpd_id='dash', fatal=False))
elif format_type in self._MP4_TYPES:
formats.append({
'url': url,
'ext': 'mp4',
'format_id': format_type,
'quality': quality(format_type),
})
else:
self.report_warning(f'Unknown format type: {format_type!r}')
return formats
def _real_extract(self, url):
user, post_id = self._match_valid_url(url).group('user', 'post_id')
post = self._download_json(
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
note='Downloading post data', errnote='Unable to download post data')
post_title = post.get('title')
if not post_title:
self.report_warning('Unable to extract post title. Falling back to parsing html page')
webpage = self._download_webpage(url, video_id=post_id)
post_title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
common_metadata = {
'title': post_title,
**traverse_obj(post, {
'channel': ('user', 'name', {str}),
'channel_id': ('user', 'id', {str_or_none}),
'timestamp': ('createdAt', {int_or_none}),
'release_timestamp': ('publishTime', {int_or_none}),
'modified_timestamp': ('updatedAt', {int_or_none}),
'tags': ('tags', ..., 'title', {str}),
'like_count': ('count', 'likes', {int_or_none}),
}),
}
entries = []
for item in traverse_obj(post, ('data', ..., {dict})):
item_type = item.get('type')
if item_type == 'video' and url_or_none(item.get('url')):
entries.append(self.url_result(item['url'], YoutubeIE))
elif item_type == 'ok_video':
video_id = item.get('id') or post_id
entries.append({
'id': video_id,
'formats': self._extract_formats(item.get('playerUrls'), video_id),
**common_metadata,
**traverse_obj(item, {
'title': ('title', {str}),
'duration': ('duration', {int_or_none}),
'view_count': ('viewsCounter', {int_or_none}),
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
}, get_all=False)})
if not entries:
raise ExtractorError('No videos found', expected=True)
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries, post_id, post_title, **common_metadata)

View file

@ -1,6 +1,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_resolution, parse_resolution,
@ -60,6 +61,7 @@ def _real_extract(self, url):
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
'media': media_type, 'media': media_type,
'idint': media_id, 'idint': media_id,
'format': 'dm',
}) })
formats = [] formats = []
@ -69,6 +71,10 @@ def _real_extract(self, url):
format_url = url_or_none(format_.get('file')) format_url = url_or_none(format_.get('file'))
if not format_url: if not format_url:
continue continue
if determine_ext(format_url) == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, media_id, mpd_id='dash', fatal=False))
continue
label = format_.get('label') label = format_.get('label')
f = parse_resolution(label) f = parse_resolution(label)
f.update({ f.update({

View file

@ -67,7 +67,10 @@ def _real_extract(self, url):
html = self._download_webpage(url, video_id) html = self._download_webpage(url, video_id)
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails'] idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
if idetails.get('err_code') == 1200: err_code = idetails.get('err_code')
if err_code == 1002:
self.raise_login_required()
elif err_code == 1200:
self.raise_geo_restricted( self.raise_geo_restricted(
'This video is not available from your location due to geo restriction. ' 'This video is not available from your location due to geo restriction. '
'You may be able to bypass it by using the /details/ page instead of the /watch/ page', 'You may be able to bypass it by using the /details/ page instead of the /watch/ page',

View file

@ -33,10 +33,7 @@ def _real_extract(self, url):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
manifest = self._parse_json( manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id)
self._search_regex(
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
video_id)
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))

View file

@ -1,8 +1,10 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
encode_base_n,
ExtractorError, ExtractorError,
encode_base_n,
get_elements_by_class,
int_or_none, int_or_none,
join_nonempty,
merge_dicts, merge_dicts,
parse_duration, parse_duration,
str_to_int, str_to_int,
@ -81,6 +83,7 @@ def calc_hash(s):
sources = video['sources'] sources = video['sources']
formats = [] formats = []
has_av1 = bool(get_elements_by_class('download-av1', webpage))
for kind, formats_dict in sources.items(): for kind, formats_dict in sources.items():
if not isinstance(formats_dict, dict): if not isinstance(formats_dict, dict):
continue continue
@ -106,6 +109,14 @@ def calc_hash(s):
'height': height, 'height': height,
'fps': fps, 'fps': fps,
}) })
if has_av1:
formats.append({
'url': src.replace('.mp4', '-av1.mp4'),
'format_id': join_nonempty('av1', format_id),
'height': height,
'fps': fps,
'vcodec': 'av1',
})
json_ld = self._search_json_ld(webpage, display_id, default={}) json_ld = self._search_json_ld(webpage, display_id, default={})

View file

@ -1,25 +1,29 @@
from .common import InfoExtractor from .common import InfoExtractor
from .nexx import NexxIE from .nexx import NexxIE
from ..utils import (
int_or_none,
str_or_none,
)
class FunkIE(InfoExtractor): class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81', 'md5': '8610449476156f338761a75391b0017d',
'info_dict': { 'info_dict': {
'id': '1155821', 'id': '1155821',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2', 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
'description': 'md5:a691d0413ef4835588c5b03ded670c1f', 'description': 'md5:2a03b67596eda0d1b5125c299f45e953',
'timestamp': 1514507395, 'timestamp': 1514507395,
'upload_date': '20171229', 'upload_date': '20171229',
'duration': 426.0,
'cast': ['United Creators PMB GmbH'],
'thumbnail': 'https://assets.nexx.cloud/media/75/56/79/3YKUSJN1LACN0CRxL.jpg',
'display_id': 'die-lustigsten-instrumente-aus-dem-internet-teil-2',
'alt_title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet Teil 2',
'season_number': 0,
'season': 'Season 0',
'episode_number': 0,
'episode': 'Episode 0',
}, },
}, { }, {
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
'only_matching': True, 'only_matching': True,
@ -27,18 +31,10 @@ class FunkIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id, nexx_id = self._match_valid_url(url).groups() display_id, nexx_id = self._match_valid_url(url).groups()
video = self._download_json(
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'nexx:741:' + nexx_id, 'url': f'nexx:741:{nexx_id}',
'ie_key': NexxIE.ie_key(), 'ie_key': NexxIE.ie_key(),
'id': nexx_id, 'id': nexx_id,
'title': video.get('title'),
'description': video.get('description'),
'duration': int_or_none(video.get('duration')),
'channel_id': str_or_none(video.get('channelId')),
'display_id': display_id, 'display_id': display_id,
'tags': video.get('tags'),
'thumbnail': video.get('imageUrlLandscape'),
} }

View file

@ -13,7 +13,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/', 'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
'md5': 'e94de44cd80818084352fcf8de1ce82c', 'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
'info_dict': { 'info_dict': {
'id': 'g9j7Eovo', 'id': 'g9j7Eovo',
'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées', 'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
@ -26,7 +26,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
}, },
}, { }, {
'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/', 'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
'md5': '0b3f10332b812034b3a3eda1ef877c5f', 'md5': '319c662943dd777bab835cae1e2d73a5',
'info_dict': { 'info_dict': {
'id': 'LeAgybyc', 'id': 'LeAgybyc',
'title': 'Intelligence artificielle : faut-il sen méfier ?', 'title': 'Intelligence artificielle : faut-il sen méfier ?',
@ -41,7 +41,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/', 'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
'md5': '3972ddf2d5f8b98699f191687258e2f9', 'md5': '6289f9489efb969e38245f31721596fe',
'info_dict': { 'info_dict': {
'id': 'QChnbPYA', 'id': 'QChnbPYA',
'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International', 'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
@ -55,7 +55,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
}, },
}, { }, {
'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/', 'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
'md5': '3ac0a0769546ee6be41ab52caea5d9a9', 'md5': 'f6df814cae53e85937621599d2967520',
'info_dict': { 'info_dict': {
'id': 'QJzqoNbf', 'id': 'QJzqoNbf',
'title': 'La philosophe Nathalie Sarthou-Lajus est linvitée du Figaro Live', 'title': 'La philosophe Nathalie Sarthou-Lajus est linvitée du Figaro Live',
@ -73,7 +73,8 @@ def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
player_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']['playerData'] player_data = self._search_nextjs_data(
webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
return self.url_result( return self.url_result(
f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'), f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),

View file

@ -28,12 +28,24 @@ class MagellanTVIE(InfoExtractor):
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'], 'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.magellantv.com/watch/celebration-nation',
'info_dict': {
'id': 'celebration-nation',
'ext': 'mp4',
'tags': ['Art & Culture', 'Human Interest', 'Anthropology', 'China', 'History'],
'duration': 2640.0,
'title': 'Ancestors',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail'] data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', 'reactContext',
(('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
return { return {

View file

@ -1,10 +1,54 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import (
int_or_none,
join_nonempty,
merge_dicts,
parse_count,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class NFBIE(InfoExtractor): class NFBBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P<id>[^/?#&]+)' _VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
_GEO_COUNTRIES = ['CA']
def _extract_ep_data(self, webpage, video_id, fatal=False):
return self._search_json(
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
def _extract_ep_info(self, data, video_id, slug=None):
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
'description': ('description', {str}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'uploader': ('data_layer', 'episodeMaker', {str}),
'release_year': ('data_layer', 'episodeYear', {int_or_none}),
'episode': ('data_layer', 'episodeTitle', {str}),
'season': ('data_layer', 'seasonTitle', {str}),
'season_number': ('data_layer', 'seasonTitle', {parse_count}),
'series': ('data_layer', 'seriesTitle', {str}),
}), get_all=False)
return {
**info,
'id': video_id,
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
'episode_number': int_or_none(self._search_regex(
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
}
class NFBIE(NFBBaseIE):
IE_NAME = 'nfb'
IE_DESC = 'nfb.ca and onf.ca films and episodes'
_VALID_URL = [
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
]
_TESTS = [{ _TESTS = [{
'note': 'NFB film',
'url': 'https://www.nfb.ca/film/trafficopter/', 'url': 'https://www.nfb.ca/film/trafficopter/',
'info_dict': { 'info_dict': {
'id': 'trafficopter', 'id': 'trafficopter',
@ -14,29 +58,192 @@ class NFBIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Barrie Howells', 'uploader': 'Barrie Howells',
'release_year': 1972, 'release_year': 1972,
'duration': 600.0,
}, },
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF film',
'url': 'https://www.onf.ca/film/mal-du-siecle/',
'info_dict': {
'id': 'mal-du-siecle',
'ext': 'mp4',
'title': 'Le mal du siècle',
'description': 'md5:1abf774d77569ebe603419f2d344102b',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Catherine Lepage',
'release_year': 2019,
'duration': 300.0,
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with English title',
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
'info_dict': {
'id': 'true-north-episode9-true-north-finale-making-it',
'ext': 'mp4',
'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
'series': 'True North: Inside the Rise of Toronto Basketball',
'release_year': 2018,
'season': 'Season 1',
'season_number': 1,
'episode': 'Finale: Making It',
'episode_number': 9,
'uploader': 'Ryan Sidhoo',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with French title',
'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
'info_dict': {
'id': 'direction-nord-episode-9',
'ext': 'mp4',
'title': 'Direction nord La montée du basketball à Toronto - Finale : Réussir',
'description': 'md5:349a57419b71432b97bf6083d92b029d',
'series': 'Direction nord La montée du basketball à Toronto',
'release_year': 2018,
'season': 'Saison 1',
'season_number': 1,
'episode': 'Finale : Réussir',
'episode_number': 9,
'uploader': 'Ryan Sidhoo',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with French title (needs geo-bypass)',
'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
'info_dict': {
'id': 'etoile-du-nord-episode-1-lobservation',
'ext': 'mp4',
'title': 'Étoile du Nord - L\'observation',
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
'series': 'Étoile du Nord',
'release_year': 2023,
'season': 'Saison 1',
'season_number': 1,
'episode': 'L\'observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with English title (needs geo-bypass)',
'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
'info_dict': {
'id': 'north-star-episode-1-observation',
'ext': 'mp4',
'title': 'North Star - Observation',
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
'series': 'North Star',
'release_year': 2023,
'season': 'Season 1',
'season_number': 1,
'episode': 'Observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
'info_dict': {
'id': 'north-star-episode-1-observation',
'ext': 'mp4',
'title': 'North Star - Observation',
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
'series': 'North Star',
'release_year': 2023,
'season': 'Season 1',
'season_number': 1,
'episode': 'Observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
'info_dict': {
'id': 'etoile-du-nord-episode-1-lobservation',
'ext': 'mp4',
'title': 'Étoile du Nord - L\'observation',
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
'series': 'Étoile du Nord',
'release_year': 2023,
'season': 'Saison 1',
'season_number': 1,
'episode': 'L\'observation',
'episode_number': 1,
'uploader': 'Patrick Bossé',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'Season 2 episode w/o episode num in id, extract from json ld',
'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
'info_dict': {
'id': 'liste-des-choses-qui-existent-saison-2-ours',
'ext': 'mp4',
'title': 'La liste des choses qui existent - L\'ours en peluche',
'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
'series': 'La liste des choses qui existent',
'release_year': 2022,
'season': 'Saison 2',
'season_number': 2,
'episode': 'L\'ours en peluche',
'episode_number': 12,
'uploader': 'Francis Papillon',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}, {
'note': 'NFB film /embed/player/ page',
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
'info_dict': {
'id': 'afterlife',
'ext': 'mp4',
'title': 'Afterlife',
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
'release_year': 1978,
'duration': 420.0,
'uploader': 'Ishu Patel',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
# Need to construct the URL since we match /embed/player/ URLs as well
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
# type_ can change from film to serie(s) after redirect; new slug may have episode number
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
webpage = self._download_webpage('https://www.nfb.ca/film/%s/' % video_id, video_id) embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
video_id = self._match_id(embed_url) # embed url has unique slug
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
if 'MESSAGE_GEOBLOCKED' in player:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
iframe = self._html_search_regex( formats, subtitles = self._extract_m3u8_formats_and_subtitles(
r'<[^>]+\bid=["\']player-iframe["\'][^>]*src=["\']([^"\']+)', self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
webpage, 'iframe', default=None, fatal=True) video_id, 'mp4', m3u8_id='hls')
if iframe.startswith('/'):
iframe = f'https://www.nfb.ca{iframe}'
player = self._download_webpage(iframe, video_id) if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
for fmt in fmts:
fmt['format_note'] = 'described video'
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
source = self._html_search_regex( info = {
r'source:\s*\'([^\']+)',
player, 'source', default=None, fatal=True)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
return {
'id': video_id, 'id': video_id,
'title': self._html_search_regex( 'title': self._html_search_regex(
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>', r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
@ -45,14 +252,49 @@ def _real_extract(self, url):
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)', r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
webpage, 'description', default=None), webpage, 'description', default=None),
'thumbnail': self._html_search_regex( 'thumbnail': self._html_search_regex(
r'poster:\s*\'([^\']+)', r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
player, 'thumbnail', default=None),
'uploader': self._html_search_regex( 'uploader': self._html_search_regex(
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
webpage, 'uploader', default=None),
'release_year': int_or_none(self._html_search_regex( 'release_year': int_or_none(self._html_search_regex(
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)', r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
webpage, 'release_year', default=None)), webpage, 'release_year', default=None)),
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
return merge_dicts({
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
} }, info, self._search_json_ld(webpage, video_id, default={}))
class NFBSeriesIE(NFBBaseIE):
IE_NAME = 'nfb:series'
IE_DESC = 'nfb.ca and onf.ca series'
_VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
'playlist_mincount': 9,
'info_dict': {
'id': 'true-north-inside-the-rise-of-toronto-basketball',
},
}, {
'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
'playlist_mincount': 26,
'info_dict': {
'id': 'la-liste-des-choses-qui-existent-serie',
},
}]
def _entries(self, episodes):
for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
mobj = NFBIE._match_valid_url(episode['embed_url'])
yield self.url_result(
mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
def _real_extract(self, url):
site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
season_path = 'saison' if type_ == 'serie' else 'season'
webpage = self._download_webpage(
f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
episodes = self._extract_ep_data(webpage, series_id, fatal=True)
return self.playlist_result(self._entries(episodes), series_id)

View file

@ -0,0 +1,225 @@
from .common import InfoExtractor
from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NinaProtocolIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[^/#?]+)'
_TESTS = [{
'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
'title': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'channel': 'ppm',
'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db',
'album': 'The Spatulas - March Chant',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'uploader': 'ppmrecs',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'display_id': 'the-spatulas-march-chant',
'upload_date': '20231201',
'album_artist': 'Post Present Medium ',
},
'playlist': [{
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1',
'title': 'March Chant In April',
'track': 'March Chant In April',
'ext': 'mp3',
'duration': 152,
'track_number': 1,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'uploader': 'ppmrecs',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'channel': 'ppm',
'album': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'upload_date': '20231201',
'album_artist': 'Post Present Medium ',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2',
'title': 'Rescue Mission',
'track': 'Rescue Mission',
'ext': 'mp3',
'duration': 212,
'track_number': 2,
'album_artist': 'Post Present Medium ',
'uploader': 'ppmrecs',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'channel': 'ppm',
'upload_date': '20231201',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'timestamp': 1701417610,
'album': 'The Spatulas - March Chant',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3',
'title': 'Slinger Style',
'track': 'Slinger Style',
'ext': 'mp3',
'duration': 179,
'track_number': 3,
'timestamp': 1701417610,
'upload_date': '20231201',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'album_artist': 'Post Present Medium ',
'album': 'The Spatulas - March Chant',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader': 'ppmrecs',
'channel': 'ppm',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4',
'title': 'Psychic Signal',
'track': 'Psychic Signal',
'ext': 'mp3',
'duration': 220,
'track_number': 4,
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'upload_date': '20231201',
'album': 'The Spatulas - March Chant',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'timestamp': 1701417610,
'album_artist': 'Post Present Medium ',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'channel': 'ppm',
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'uploader': 'ppmrecs',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5',
'title': 'Curvy Color',
'track': 'Curvy Color',
'ext': 'mp3',
'duration': 148,
'track_number': 5,
'timestamp': 1701417610,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'album': 'The Spatulas - March Chant',
'album_artist': 'Post Present Medium ',
'channel': 'ppm',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'uploader': 'ppmrecs',
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'upload_date': '20231201',
}
}, {
'info_dict': {
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6',
'title': 'Caveman Star',
'track': 'Caveman Star',
'ext': 'mp3',
'duration': 121,
'track_number': 6,
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
'tags': ['punk', 'postpresentmedium', 'cambridge'],
'album_artist': 'Post Present Medium ',
'uploader': 'ppmrecs',
'timestamp': 1701417610,
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
'album': 'The Spatulas - March Chant',
'channel': 'ppm',
'upload_date': '20231201',
},
}],
}, {
'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield',
'info_dict': {
'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J',
'description': 'md5:63f08d5db558b4b36e1896f317062721',
'title': 'F.G.S. - American Shield',
'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci',
'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE',
'channel': 'tinkscough',
'tags': [],
'album_artist': 'F.G.S.',
'album': 'F.G.S. - American Shield',
'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8',
'display_id': 'f-g-s-american-shield',
'uploader': 'flannerysilva',
'timestamp': 1702395858,
'upload_date': '20231212',
},
'playlist_count': 1,
}, {
'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out',
'info_dict': {
'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh',
'display_id': 'time-to-figure-things-out',
'description': 'md5:960202ed01c3134bb8958f1008527e35',
'timestamp': 1706283607,
'title': 'DJ STEPDAD - time to figure things out',
'album_artist': 'DJ STEPDAD',
'uploader': 'tddvsss',
'upload_date': '20240126',
'album': 'time to figure things out',
'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi',
'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss',
'tags': [],
},
'playlist_count': 4,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
release = self._download_json(
f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release']
video_id = release.get('publicKey') or video_id
common_info = traverse_obj(release, {
'album': ('metadata', 'properties', 'title', {str}),
'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}),
'timestamp': ('datetime', {parse_iso8601}),
'thumbnail': ('metadata', 'image', {url_or_none}),
'uploader': ('publisherAccount', 'handle', {str}),
'uploader_id': ('publisherAccount', 'publicKey', {str}),
'channel': ('hub', 'handle', {str}),
'channel_id': ('hub', 'publicKey', {str}),
}, get_all=False)
common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str}))
entries = []
for track_num, track in enumerate(traverse_obj(release, (
'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1):
entries.append({
'id': f'{video_id}_{track_num}',
'url': track['uri'],
**traverse_obj(track, {
'title': ('track_title', {str}),
'track': ('track_title', {str}),
'ext': ('type', {mimetype2ext}),
'track_number': ('track', {int_or_none}),
'duration': ('duration', {int_or_none}),
}),
'vcodec': 'none',
**common_info,
})
return {
'_type': 'playlist',
'id': video_id,
'entries': entries,
**traverse_obj(release, {
'display_id': ('slug', {str}),
'title': ('metadata', 'name', {str}),
'description': ('metadata', 'description', {str}),
}),
**common_info,
}

199
yt_dlp/extractor/nuum.py Normal file
View file

@ -0,0 +1,199 @@
import functools
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
UserNotLive,
filter_dict,
int_or_none,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class NuumBaseIE(InfoExtractor):
def _call_api(self, path, video_id, description, query={}):
response = self._download_json(
f'https://nuum.ru/api/v2/{path}', video_id, query=query,
note=f'Downloading {description} metadata',
errnote=f'Unable to download {description} metadata')
if error := response.get('error'):
raise ExtractorError(f'API returned error: {error!r}')
return response['result']
def _get_channel_info(self, channel_name):
return self._call_api(
'broadcasts/public', video_id=channel_name, description='channel',
query={
'with_extra': 'true',
'channel_name': channel_name,
'with_deleted': 'true',
})
def _parse_video_data(self, container, extract_formats=True):
stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
media_url = traverse_obj(media, (
'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
video_id = str(container['media_container_id'])
is_live = media.get('media_status') == 'RUNNING'
formats, subtitles = None, None
if extract_formats:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', live=is_live)
return filter_dict({
'id': video_id,
'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(container, {
'title': ('media_container_name', {str}),
'description': ('media_container_description', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'channel': ('media_container_channel', 'channel_name', {str}),
'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
}),
**traverse_obj(stream, {
'view_count': ('stream_total_viewers', {int_or_none}),
'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
}),
**traverse_obj(media, {
'duration': ('media_duration', {int_or_none}),
'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
}, get_all=False),
})
class NuumMediaIE(NuumBaseIE):
IE_NAME = 'nuum:media'
_VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
_TESTS = [{
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
'only_matching': True,
}, {
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
'md5': 'f1d9118a30403e32b702a204eb03aca3',
'info_dict': {
'id': '1567547',
'ext': 'mp4',
'title': 'Toxi$ - Hurtz',
'description': '',
'timestamp': 1702631651,
'upload_date': '20231215',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
'concurrent_view_count': int,
'channel_id': '6911',
'channel': 'toxis',
'duration': 116,
},
}, {
'url': 'https://nuum.ru/clips/1552564-pro-misu',
'md5': 'b248ae1565b1e55433188f11beeb0ca1',
'info_dict': {
'id': '1552564',
'ext': 'mp4',
'title': 'Про Мису 🙃',
'timestamp': 1701971828,
'upload_date': '20231207',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
'concurrent_view_count': int,
'channel_id': '3320',
'channel': 'Misalelik',
'duration': 41,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
return self._parse_video_data(video_data)
class NuumLiveIE(NuumBaseIE):
IE_NAME = 'nuum:live'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://nuum.ru/channel/mts_live',
'only_matching': True,
}]
def _real_extract(self, url):
channel = self._match_id(url)
channel_info = self._get_channel_info(channel)
if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
raise UserNotLive(video_id=channel)
info = self._parse_video_data(channel_info['media_container'])
return {
'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
'extractor_key': NuumMediaIE.ie_key(),
'extractor': NuumMediaIE.IE_NAME,
**info,
}
class NuumTabIE(NuumBaseIE):
IE_NAME = 'nuum:tab'
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
_TESTS = [{
'url': 'https://nuum.ru/channel/dankon_/clips',
'info_dict': {
'id': 'dankon__clips',
'title': 'Dankon_',
},
'playlist_mincount': 29,
}, {
'url': 'https://nuum.ru/channel/dankon_/videos',
'info_dict': {
'id': 'dankon__videos',
'title': 'Dankon_',
},
'playlist_mincount': 2,
}, {
'url': 'https://nuum.ru/channel/dankon_/streams',
'info_dict': {
'id': 'dankon__streams',
'title': 'Dankon_',
},
'playlist_mincount': 1,
}]
_PAGE_SIZE = 50
def _fetch_page(self, channel_id, tab_type, tab_id, page):
CONTAINER_TYPES = {
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
'videos': ['LONG_VIDEO'],
'streams': ['SINGLE'],
}
media_containers = self._call_api(
'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
query={
'limit': self._PAGE_SIZE,
'offset': page * self._PAGE_SIZE,
'channel_id': channel_id,
'media_container_status': 'STOPPED',
'media_container_type': CONTAINER_TYPES[tab_type],
})
for container in traverse_obj(media_containers, (..., {dict})):
metadata = self._parse_video_data(container, extract_formats=False)
yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
def _real_extract(self, url):
channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
tab_id = f'{channel_name}_{tab_type}'
channel_data = self._get_channel_info(channel_name)['channel']
return self.playlist_result(OnDemandPagedList(functools.partial(
self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))

View file

@ -1,50 +1,93 @@
import hmac import json
import hashlib import uuid
import base64
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError,
clean_html,
determine_ext, determine_ext,
extract_attributes,
float_or_none, float_or_none,
get_elements_html_by_class,
int_or_none, int_or_none,
js_to_json, merge_dicts,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
remove_end,
remove_start, remove_start,
str_or_none,
traverse_obj,
url_or_none,
) )
class NYTimesBaseIE(InfoExtractor): class NYTimesBaseIE(InfoExtractor):
_SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v' _DNS_NAMESPACE = uuid.UUID('36dd619a-56dc-595b-9e09-37f4152c7b5d')
_TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB'
_GRAPHQL_API = 'https://samizdat-graphql.nytimes.com/graphql/v2'
_GRAPHQL_QUERY = '''query VideoQuery($id: String!) {
video(id: $id) {
... on Video {
bylines {
renderedRepresentation
}
duration
firstPublished
promotionalHeadline
promotionalMedia {
... on Image {
crops {
name
renditions {
name
width
height
url
}
}
}
}
renditions {
type
width
height
url
bitrate
}
summary
}
}
}'''
def _extract_video_from_id(self, video_id): def _call_api(self, media_id):
# Authorization generation algorithm is reverse engineered from `signer` in # reference: `id-to-uri.js`
# http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js video_uuid = uuid.uuid5(self._DNS_NAMESPACE, 'video')
path = '/svc/video/api/v3/video/' + video_id media_uuid = uuid.uuid5(video_uuid, media_id)
hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
'X-NYTV': 'vhs',
}, fatal=False)
if not video_data:
video_data = self._download_json(
'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
video_id, 'Downloading video JSON')
title = video_data['headline'] return traverse_obj(self._download_json(
self._GRAPHQL_API, media_id, 'Downloading JSON from GraphQL API', data=json.dumps({
'query': self._GRAPHQL_QUERY,
'variables': {'id': f'nyt://video/{media_uuid}'},
}, separators=(',', ':')).encode(), headers={
'Content-Type': 'application/json',
'Nyt-App-Type': 'vhs',
'Nyt-App-Version': 'v3.52.21',
'Nyt-Token': self._TOKEN,
'Origin': 'https://nytimes.com',
}, fatal=False), ('data', 'video', {dict})) or {}
def get_file_size(file_size): def _extract_thumbnails(self, thumbs):
if isinstance(file_size, int): return traverse_obj(thumbs, (lambda _, v: url_or_none(v['url']), {
return file_size 'url': 'url',
elif isinstance(file_size, dict): 'width': ('width', {int_or_none}),
return int(file_size.get('value', 0)) 'height': ('height', {int_or_none}),
else: }), default=None)
return None
def _extract_formats_and_subtitles(self, video_id, content_media_json):
urls = [] urls = []
formats = [] formats = []
subtitles = {} subtitles = {}
for video in video_data.get('renditions', []): for video in traverse_obj(content_media_json, ('renditions', ..., {dict})):
video_url = video.get('url') video_url = video.get('url')
format_id = video.get('type') format_id = video.get('type')
if not video_url or format_id == 'thumbs' or video_url in urls: if not video_url or format_id == 'thumbs' or video_url in urls:
@ -56,11 +99,9 @@ def get_file_size(file_size):
video_url, video_id, 'mp4', 'm3u8_native', video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id or 'hls', fatal=False) m3u8_id=format_id or 'hls', fatal=False)
formats.extend(m3u8_fmts) formats.extend(m3u8_fmts)
subtitles = self._merge_subtitles(subtitles, m3u8_subs) self._merge_subtitles(m3u8_subs, target=subtitles)
elif ext == 'mpd': elif ext == 'mpd':
continue continue # all mpd urls give 404 errors
# formats.extend(self._extract_mpd_formats(
# video_url, video_id, format_id or 'dash', fatal=False))
else: else:
formats.append({ formats.append({
'url': video_url, 'url': video_url,
@ -68,55 +109,50 @@ def get_file_size(file_size):
'vcodec': video.get('videoencoding') or video.get('video_codec'), 'vcodec': video.get('videoencoding') or video.get('video_codec'),
'width': int_or_none(video.get('width')), 'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')), 'height': int_or_none(video.get('height')),
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')), 'filesize': traverse_obj(video, (
('file_size', 'fileSize'), (None, ('value')), {int_or_none}), get_all=False),
'tbr': int_or_none(video.get('bitrate'), 1000) or None, 'tbr': int_or_none(video.get('bitrate'), 1000) or None,
'ext': ext, 'ext': ext,
}) })
thumbnails = [] return formats, subtitles
for image in video_data.get('images', []):
image_url = image.get('url')
if not image_url:
continue
thumbnails.append({
'url': 'http://www.nytimes.com/' + image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
publication_date = video_data.get('publication_date') def _extract_video(self, media_id):
timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None data = self._call_api(media_id)
formats, subtitles = self._extract_formats_and_subtitles(media_id, data)
return { return {
'id': video_id, 'id': media_id,
'title': title, 'title': data.get('promotionalHeadline'),
'description': video_data.get('summary'), 'description': data.get('summary'),
'timestamp': timestamp, 'timestamp': parse_iso8601(data.get('firstPublished')),
'uploader': video_data.get('byline'), 'duration': float_or_none(data.get('duration'), scale=1000),
'duration': float_or_none(video_data.get('duration'), 1000), 'creator': ', '.join(traverse_obj(data, ( # TODO: change to 'creators'
'bylines', ..., 'renderedRepresentation', {lambda x: remove_start(x, 'By ')}))),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnails': thumbnails, 'thumbnails': self._extract_thumbnails(
traverse_obj(data, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
} }
class NYTimesIE(NYTimesBaseIE): class NYTimesIE(NYTimesBaseIE):
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)' _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>'] _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>']
_TESTS = [{ _TESTS = [{
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
'md5': 'd665342765db043f7e225cff19df0f2d', 'md5': 'a553aa344014e3723d33893d89d4defc',
'info_dict': { 'info_dict': {
'id': '100000002847155', 'id': '100000002847155',
'ext': 'mov', 'ext': 'mp4',
'title': 'Verbatim: What Is a Photocopier?', 'title': 'Verbatim: What Is a Photocopier?',
'description': 'md5:93603dada88ddbda9395632fdc5da260', 'description': 'md5:93603dada88ddbda9395632fdc5da260',
'timestamp': 1398631707, 'timestamp': 1398646132,
'upload_date': '20140427', 'upload_date': '20140428',
'uploader': 'Brett Weiner', 'creator': 'Brett Weiner',
'thumbnail': r're:https?://\w+\.nyt.com/images/.+\.jpg',
'duration': 419, 'duration': 419,
} },
}, { }, {
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html', 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
'only_matching': True, 'only_matching': True,
@ -125,138 +161,260 @@ class NYTimesIE(NYTimesBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
return self._extract_video_from_id(video_id) return self._extract_video(video_id)
class NYTimesArticleIE(NYTimesBaseIE): class NYTimesArticleIE(NYTimesBaseIE):
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?' _VALID_URL = r'https?://(?:www\.)?nytimes\.com/\d{4}/\d{2}/\d{2}/(?!books|podcasts)[^/?#]+/(?:\w+/)?(?P<id>[^./?#]+)(?:\.html)?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0', 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
'md5': 'e2076d58b4da18e6a001d53fd56db3c9', 'md5': '3eb5ddb1d6f86254fe4f233826778737',
'info_dict': { 'info_dict': {
'id': '100000003628438', 'id': '100000003628438',
'ext': 'mov', 'ext': 'mp4',
'title': 'New Minimum Wage: $70,000 a Year', 'title': 'One Companys New Minimum Wage: $70,000 a Year',
'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.', 'description': 'md5:89ba9ab67ca767bb92bf823d1f138433',
'timestamp': 1429033037, 'timestamp': 1429047468,
'upload_date': '20150414', 'upload_date': '20150414',
'uploader': 'Matthew Williams', 'uploader': 'Matthew Williams',
} 'creator': 'Patricia Cohen',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
'duration': 119.0,
},
}, { }, {
'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html', # article with audio and no video
'md5': 'e0d52040cafb07662acf3c9132db3575', 'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
'md5': '2365b3555c8aa7f4dd34ca735ad02e6a',
'info_dict': { 'info_dict': {
'id': '100000004709062', 'id': '100000009110381',
'title': 'The Run-Up: He Was Like an Octopus',
'ext': 'mp3', 'ext': 'mp3',
'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4', 'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
'series': 'The Run-Up', 'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
'episode': 'He Was Like an Octopus', 'timestamp': 1695960700,
'episode_number': 20, 'upload_date': '20230929',
'duration': 2130, 'creator': 'Stephanie Nolen, Natalija Gormalova',
} 'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
'duration': 1322,
},
}, { }, {
'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html', 'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
'md5': '3eb5ddb1d6f86254fe4f233826778737',
'info_dict': { 'info_dict': {
'id': '100000004709479', 'id': '100000009202270',
'title': 'The Rise of Hitler', 'ext': 'mp4',
'ext': 'mp3', 'title': 'Kamala Harris Defends Biden Policies, but Says More Work Needed to Reach Voters',
'description': 'md5:bce877fd9e3444990cb141875fab0028', 'description': 'md5:de4212a7e19bb89e4fb14210ca915f1f',
'creator': 'Pamela Paul', 'timestamp': 1701290997,
'duration': 3475, 'upload_date': '20231129',
'uploader': 'By The New York Times',
'creator': 'Katie Rogers',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
'duration': 97.631,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': 'm3u8',
}, },
}, { }, {
'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1', # multiple videos in the same article
'url': 'https://www.nytimes.com/2023/12/02/business/air-traffic-controllers-safety.html',
'info_dict': {
'id': 'air-traffic-controllers-safety',
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
'upload_date': '20231202',
'creator': 'Emily Steel, Sydney Ember',
'timestamp': 1701511264,
},
'playlist_count': 3,
}, {
'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
'only_matching': True, 'only_matching': True,
}] }]
def _extract_podcast_from_json(self, json, page_id, webpage): def _extract_content_from_block(self, block):
podcast_audio = self._parse_json( details = traverse_obj(block, {
json, page_id, transform_source=js_to_json) 'id': ('sourceId', {str}),
'uploader': ('bylines', ..., 'renderedRepresentation', {str}),
'duration': (None, (('duration', {lambda x: float_or_none(x, scale=1000)}), ('length', {int_or_none}))),
'timestamp': ('firstPublished', {parse_iso8601}),
'series': ('podcastSeries', {str}),
}, get_all=False)
audio_data = podcast_audio['data'] formats, subtitles = self._extract_formats_and_subtitles(details.get('id'), block)
track = audio_data['track'] # audio articles will have an url and no formats
url = traverse_obj(block, ('fileUrl', {url_or_none}))
episode_title = track['title'] if not formats and url:
video_url = track['source'] formats.append({'url': url, 'vcodec': 'none'})
description = track.get('description') or self._html_search_meta(
['og:description', 'twitter:description'], webpage)
podcast_title = audio_data.get('podcast', {}).get('title')
title = ('%s: %s' % (podcast_title, episode_title)
if podcast_title else episode_title)
episode = audio_data.get('podcast', {}).get('episode') or ''
episode_number = int_or_none(self._search_regex(
r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
return { return {
'id': remove_start(podcast_audio.get('target'), 'FT') or page_id, **details,
'url': video_url, 'thumbnails': self._extract_thumbnails(traverse_obj(
'title': title, block, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
'description': description, 'formats': formats,
'creator': track.get('credit'), 'subtitles': subtitles
'series': podcast_title,
'episode': episode_title,
'episode_number': episode_number,
'duration': int_or_none(track.get('duration')),
} }
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
art_json = self._search_json(
r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
video_id = self._search_regex( blocks = traverse_obj(art_json, (
r'data-videoid=["\'](\d+)', webpage, 'video id', 'sprinkledBody', 'content', ..., ('ledeMedia', None),
default=None, fatal=False) lambda _, v: v['__typename'] in ('Video', 'Audio')))
if video_id is not None: if not blocks:
return self._extract_video_from_id(video_id) raise ExtractorError('Unable to extract any media blocks from webpage')
podcast_data = self._search_regex( common_info = {
(r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script', 'title': remove_end(self._html_extract_title(webpage), ' - The New York Times'),
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'), 'description': traverse_obj(art_json, (
webpage, 'podcast data') 'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
return self._extract_podcast_from_json(podcast_data, page_id, webpage) get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
'creator': ', '.join(
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
'thumbnails': self._extract_thumbnails(traverse_obj(
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
}
entries = []
for block in blocks:
entries.append(merge_dicts(self._extract_content_from_block(block), common_info))
if len(entries) > 1:
return self.playlist_result(entries, page_id, **common_info)
return {
'id': page_id,
**entries[0],
}
class NYTimesCookingIE(NYTimesBaseIE): class NYTimesCookingIE(NYTimesBaseIE):
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)' IE_NAME = 'NYTimesCookingGuide'
_VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart', 'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
'info_dict': { 'info_dict': {
'id': '100000004756089', 'id': '13-how-to-cook-a-turkey',
'ext': 'mov', 'title': 'How to Cook a Turkey',
'timestamp': 1479383008, 'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON', },
'title': 'Cranberry Tart', 'playlist_count': 2,
'upload_date': '20161117', }, {
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.', # single video example
'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
'md5': '64415805fe0b8640fce6b0b9def5989a',
'info_dict': {
'id': '100000005835845',
'ext': 'mp4',
'title': 'How to Make Mac and Cheese',
'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
'timestamp': 1522950315,
'upload_date': '20180405',
'duration': 9.51,
'creator': 'Alison Roman',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
}, },
}, { }, {
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey', 'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
'md5': '4b2e8c70530a89b8d905a2b572316eb8', 'md5': '64415805fe0b8640fce6b0b9def5989a',
'info_dict': { 'info_dict': {
'id': '100000003951728', 'id': '20-how-to-frost-a-cake',
'ext': 'mov', 'title': 'How to Frost a Cake',
'timestamp': 1445509539, 'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
'description': 'Turkey guide', },
'upload_date': '20151022', 'playlist_count': 8,
'title': 'Turkey',
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
video_id = self._search_regex( lead_video_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'video id') r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
media_ids = traverse_obj(
get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
return self._extract_video_from_id(video_id) if media_ids:
media_ids.append(lead_video_id)
return self.playlist_result(
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
return {
**self._extract_video(lead_video_id),
'title': title,
'description': description,
'creator': self._search_regex( # TODO: change to 'creators'
r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
}
class NYTimesCookingRecipeIE(InfoExtractor):
_VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
_TESTS = [{
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
'md5': '579e83bbe8e61e9de67f80edba8a78a8',
'info_dict': {
'id': '1017817',
'ext': 'mp4',
'title': 'Cranberry Curd Tart',
'description': 'md5:ad77a3fc321db636256d4343c5742152',
'timestamp': 1447804800,
'upload_date': '20151118',
'creator': 'David Tanis',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
},
}, {
'url': 'https://cooking.nytimes.com/recipes/1024781-neapolitan-checkerboard-cookies',
'md5': '58df35998241dcf0620e99e646331b42',
'info_dict': {
'id': '1024781',
'ext': 'mp4',
'title': 'Neapolitan Checkerboard Cookies',
'description': 'md5:ba12394c585ababea951cb6d2fcc6631',
'timestamp': 1701302400,
'upload_date': '20231130',
'creator': 'Sue Li',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
},
}, {
'url': 'https://cooking.nytimes.com/recipes/1019516-overnight-oats',
'md5': '2fe7965a3adc899913b8e25ada360823',
'info_dict': {
'id': '1019516',
'ext': 'mp4',
'timestamp': 1546387200,
'description': 'md5:8856ce10239161bd2596ac335b9f9bfb',
'upload_date': '20190102',
'title': 'Overnight Oats',
'creator': 'Genevieve Ko',
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
},
}]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls')
return {
**traverse_obj(recipe_data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('topnote', {clean_html}),
'timestamp': ('publishedAt', {int_or_none}),
'creator': ('contentAttribution', 'cardByline', {str}),
}),
'formats': formats,
'subtitles': subtitles,
'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj(
recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))],
}

View file

@ -1,3 +1,4 @@
import base64
import functools import functools
import re import re
@ -565,3 +566,66 @@ def _real_extract(self, url):
}) })
return self.playlist_result(entries) return self.playlist_result(entries)
class ORFONIE(InfoExtractor):
IE_NAME = 'orf:on'
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
_TESTS = [{
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
'info_dict': {
'id': '14210000',
'ext': 'mp4',
'duration': 2651.08,
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
'title': 'School of Champions (4/8)',
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
}
}]
def _extract_video(self, video_id, display_id):
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
formats, subtitles = [], {}
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
if manifest_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
manifest_url, display_id, fatal=False, m3u8_id='hls')
elif manifest_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles(
manifest_url, display_id, fatal=False, mpd_id='dash')
else:
continue
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(api_json, {
'duration': ('duration_second', {float_or_none}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
'media_type': ('video_type', {str}),
}, get_all=False),
}
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
**self._search_json_ld(webpage, display_id, fatal=False),
**self._extract_video(video_id, display_id),
}

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,8 @@
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_iso8601, traverse_obj, try_call from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
from ..utils.traversal import traverse_obj
class PrankCastIE(InfoExtractor): class PrankCastIE(InfoExtractor):
@ -64,3 +67,71 @@ def _real_extract(self, url):
'categories': [json_info.get('broadcast_category')], 'categories': [json_info.get('broadcast_category')],
'tags': try_call(lambda: json_info['broadcast_tags'].split(',')) 'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
} }
class PrankCastPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
_TESTS = [{
'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
'info_dict': {
'id': '6214',
'ext': 'mp3',
'title': 'Happy National Rachel Day!',
'display_id': 'happy-national-rachel-day-',
'timestamp': 1704333938,
'uploader': 'Devonanustart',
'channel_id': '4',
'duration': 13175,
'cast': ['Devonanustart'],
'description': '',
'categories': ['prank call'],
'upload_date': '20240104'
}
}, {
'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
'info_dict': {
'id': '6217',
'ext': 'mp3',
'title': 'Jake the Work Crow!',
'display_id': 'jake-the-work-crow-',
'timestamp': 1704346592,
'uploader': 'despicabledogs',
'channel_id': '957',
'duration': 263.287,
'cast': ['despicabledogs'],
'description': 'https://imgur.com/a/vtxLvKU',
'categories': [],
'upload_date': '20240104'
}
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage = self._download_webpage(url, video_id)
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
content = self._parse_json(post['post_contents_json'], video_id)[0]
uploader = post.get('user_name')
guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
return {
'id': video_id,
'title': post.get('post_title') or self._og_search_title(webpage),
'display_id': display_id,
'url': content.get('url'),
'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
'uploader': uploader,
'channel_id': str_or_none(post.get('user_id')),
'duration': float_or_none(content.get('duration')),
'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
'description': post.get('post_body'),
'categories': list(filter(None, [content.get('category')])),
'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
'subtitles': {
'live_chat': [{
'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
'ext': 'json',
}],
} if post.get('content_id') else None
}

View file

@ -1,5 +1,6 @@
import base64 import base64
import random import random
import re
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
@ -11,6 +12,7 @@
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
) )
from ..utils.traversal import traverse_obj
class RadikoBaseIE(InfoExtractor): class RadikoBaseIE(InfoExtractor):
@ -159,6 +161,12 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token,
return formats return formats
def _extract_performers(self, prog):
performers = traverse_obj(prog, (
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,]', x)}, ..., {str.strip}))
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
return ', '.join(performers) or None
class RadikoIE(RadikoBaseIE): class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
@ -186,10 +194,12 @@ def _real_extract(self, url):
return { return {
'id': video_id, 'id': video_id,
'title': try_call(lambda: prog.find('title').text), 'title': try_call(lambda: prog.find('title').text),
'artist': self._extract_performers(prog),
'description': clean_html(try_call(lambda: prog.find('info').text)), 'description': clean_html(try_call(lambda: prog.find('info').text)),
'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader': try_call(lambda: station_program.find('.//name').text),
'uploader_id': station, 'uploader_id': station,
'timestamp': vid_int, 'timestamp': vid_int,
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
'is_live': True, 'is_live': True,
'formats': self._extract_formats( 'formats': self._extract_formats(
video_id=video_id, station=station, is_onair=False, video_id=video_id, station=station, is_onair=False,
@ -243,6 +253,7 @@ def _real_extract(self, url):
return { return {
'id': station, 'id': station,
'title': title, 'title': title,
'artist': self._extract_performers(prog),
'description': description, 'description': description,
'uploader': station_name, 'uploader': station_name,
'uploader_id': station, 'uploader_id': station,

View file

@ -12,7 +12,7 @@
class RedTubeIE(InfoExtractor): class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)'] _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
_TESTS = [{ _TESTS = [{
'url': 'https://www.redtube.com/38864951', 'url': 'https://www.redtube.com/38864951',
@ -35,6 +35,9 @@ class RedTubeIE(InfoExtractor):
}, { }, {
'url': 'http://it.redtube.com/66418', 'url': 'http://it.redtube.com/66418',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.redtube.com.br/103224331',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -21,7 +21,7 @@
class TVPIE(InfoExtractor): class TVPIE(InfoExtractor):
IE_NAME = 'tvp' IE_NAME = 'tvp'
IE_DESC = 'Telewizja Polska' IE_DESC = 'Telewizja Polska'
_VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)' _VALID_URL = r'https?://(?:[^/]+\.)?(?:tvp(?:parlament)?\.(?:pl|info)|tvpworld\.com|swipeto\.pl)/(?:(?!\d+/)[^/]+/)*(?P<id>\d+)(?:[/?#]|$)'
_TESTS = [{ _TESTS = [{
# TVPlayer 2 in js wrapper # TVPlayer 2 in js wrapper
@ -514,7 +514,7 @@ def _parse_video(self, video, with_url=True):
class TVPVODVideoIE(TVPVODBaseIE): class TVPVODVideoIE(TVPVODBaseIE):
IE_NAME = 'tvp:vod' IE_NAME = 'tvp:vod'
_VALID_URL = r'https?://vod\.tvp\.pl/[a-z\d-]+,\d+/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)(?:\?[^#]+)?(?:#.+)?$' _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
@ -560,12 +560,23 @@ class TVPVODVideoIE(TVPVODBaseIE):
'thumbnail': 're:https?://.+', 'thumbnail': 're:https?://.+',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://vod.tvp.pl/live,1/tvp-world,399731',
'info_dict': {
'id': '399731',
'ext': 'mp4',
'title': r're:TVP WORLD \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'live_status': 'is_live',
'thumbnail': 're:https?://.+',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) category, video_id = self._match_valid_url(url).group('category', 'id')
info_dict = self._parse_video(self._call_api(f'vods/{video_id}', video_id), with_url=False) is_live = category == 'live,1'
entity = 'lives' if is_live else 'vods'
info_dict = self._parse_video(self._call_api(f'{entity}/{video_id}', video_id), with_url=False)
playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'}) playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'})
@ -582,6 +593,8 @@ def _real_extract(self, url):
'ext': 'ttml', 'ext': 'ttml',
}) })
info_dict['is_live'] = is_live
return info_dict return info_dict

View file

@ -269,7 +269,7 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None):
'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {} 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
if not jwt_response.get('jwt'): if not jwt_response.get('jwt'):
return return
headers = {'Authorization': 'jwt %s' % jwt_response['jwt']} headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
original_response = self._download_json( original_response = self._download_json(
f'https://api.vimeo.com/videos/{video_id}', video_id, f'https://api.vimeo.com/videos/{video_id}', video_id,
headers=headers, fatal=False, expected_status=(403, 404)) or {} headers=headers, fatal=False, expected_status=(403, 404)) or {}
@ -751,6 +751,7 @@ def _extract_from_api(self, video_id, unlisted_hash=None):
video = self._download_json( video = self._download_json(
api_url, video_id, headers={ api_url, video_id, headers={
'Authorization': 'jwt ' + token, 'Authorization': 'jwt ' + token,
'Accept': 'application/json',
}, query={ }, query={
'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
}) })
@ -785,7 +786,7 @@ def _try_album_password(self, url):
jwt = viewer['jwt'] jwt = viewer['jwt']
album = self._download_json( album = self._download_json(
'https://api.vimeo.com/albums/' + album_id, 'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt}, album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'}) query={'fields': 'description,name,privacy'})
if try_get(album, lambda x: x['privacy']['view']) == 'password': if try_get(album, lambda x: x['privacy']['view']) == 'password':
password = self.get_param('videopassword') password = self.get_param('videopassword')
@ -1147,10 +1148,12 @@ def _fetch_page(self, album_id, authorization, hashed_pass, page):
'https://api.vimeo.com/albums/%s/videos' % album_id, 'https://api.vimeo.com/albums/%s/videos' % album_id,
album_id, 'Downloading page %d' % api_page, query=query, headers={ album_id, 'Downloading page %d' % api_page, query=query, headers={
'Authorization': 'jwt ' + authorization, 'Authorization': 'jwt ' + authorization,
'Accept': 'application/json',
})['data'] })['data']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
return return
raise
for video in videos: for video in videos:
link = video.get('link') link = video.get('link')
if not link: if not link:
@ -1171,7 +1174,7 @@ def _real_extract(self, url):
jwt = viewer['jwt'] jwt = viewer['jwt']
album = self._download_json( album = self._download_json(
'https://api.vimeo.com/albums/' + album_id, 'https://api.vimeo.com/albums/' + album_id,
album_id, headers={'Authorization': 'jwt ' + jwt}, album_id, headers={'Authorization': 'jwt ' + jwt, 'Accept': 'application/json'},
query={'fields': 'description,name,privacy'}) query={'fields': 'description,name,privacy'})
hashed_pass = None hashed_pass = None
if try_get(album, lambda x: x['privacy']['view']) == 'password': if try_get(album, lambda x: x['privacy']['view']) == 'password':

View file

@ -1,159 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
traverse_obj,
try_get,
)
class WASDTVBaseIE(InfoExtractor):
def _fetch(self, path, video_id, description, query={}):
response = self._download_json(
f'https://wasd.tv/api/{path}', video_id, query=query,
note=f'Downloading {description} metadata',
errnote=f'Unable to download {description} metadata')
error = response.get('error')
if error:
raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True)
return response.get('result')
def _extract_thumbnails(self, thumbnails_dict):
return [{
'url': url,
'preference': index,
} for index, url in enumerate(
traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url]
def _real_extract(self, url):
container = self._get_container(url)
stream = traverse_obj(container, ('media_container_streams', 0))
media = try_get(stream, lambda x: x['stream_media'][0])
if not media:
raise ExtractorError('Can not extract media data.', expected=True)
media_meta = media.get('media_meta')
media_url, is_live = self._get_media_url(media_meta)
video_id = media.get('media_id') or container.get('media_container_id')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
return {
'id': str(video_id),
'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)),
'description': container.get('media_container_description'),
'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images')),
'timestamp': parse_iso8601(container.get('created_at')),
'view_count': int_or_none(stream.get('stream_current_viewers' if is_live else 'stream_total_viewers')),
'is_live': is_live,
'formats': formats,
'subtitles': subtitles,
}
def _get_container(self, url):
raise NotImplementedError('Subclass for get media container')
def _get_media_url(self, media_meta):
raise NotImplementedError('Subclass for get media url')
class WASDTVStreamIE(WASDTVBaseIE):
IE_NAME = 'wasdtv:stream'
_VALID_URL = r'https?://wasd\.tv/(?P<id>[^/#?]+)$'
_TESTS = [{
'url': 'https://wasd.tv/24_7',
'info_dict': {
'id': '559738',
'ext': 'mp4',
'title': 'Live 24/7 Music',
'description': '24&#x2F;7 Music',
'timestamp': int,
'upload_date': r're:^\d{8}$',
'is_live': True,
'view_count': int,
},
}]
def _get_container(self, url):
nickname = self._match_id(url)
channel = self._fetch(f'channels/nicknames/{nickname}', video_id=nickname, description='channel')
channel_id = channel.get('channel_id')
containers = self._fetch(
'v2/media-containers', channel_id, 'running media containers',
query={
'channel_id': channel_id,
'media_container_type': 'SINGLE',
'media_container_status': 'RUNNING',
})
if not containers:
raise ExtractorError(f'{nickname} is offline', expected=True)
return containers[0]
def _get_media_url(self, media_meta):
return media_meta['media_url'], True
class WASDTVRecordIE(WASDTVBaseIE):
IE_NAME = 'wasdtv:record'
_VALID_URL = r'https?://wasd\.tv/[^/#?]+(?:/videos)?\?record=(?P<id>\d+)$'
_TESTS = [{
'url': 'https://wasd.tv/spacemita/videos?record=907755',
'md5': 'c9899dd85be4cc997816ff9f9ca516ce',
'info_dict': {
'id': '906825',
'ext': 'mp4',
'title': 'Музыкальный',
'description': 'md5:f510388d929ff60ae61d4c3cab3137cc',
'timestamp': 1645812079,
'upload_date': '20220225',
'thumbnail': r're:^https?://.+\.jpg',
'is_live': False,
'view_count': int,
},
}, {
'url': 'https://wasd.tv/spacemita?record=907755',
'only_matching': True,
}]
def _get_container(self, url):
container_id = self._match_id(url)
return self._fetch(
f'v2/media-containers/{container_id}', container_id, 'media container')
def _get_media_url(self, media_meta):
media_archive_url = media_meta.get('media_archive_url')
if media_archive_url:
return media_archive_url, False
return media_meta['media_url'], True
class WASDTVClipIE(WASDTVBaseIE):
IE_NAME = 'wasdtv:clip'
_VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P<id>\d+)$'
_TESTS = [{
'url': 'https://wasd.tv/spacemita/clips?clip=26804',
'md5': '818885e720143d7a4e776ff66fcff148',
'info_dict': {
'id': '26804',
'ext': 'mp4',
'title': 'Пуш флексит на голове стримера',
'timestamp': 1646682908,
'upload_date': '20220307',
'thumbnail': r're:^https?://.+\.jpg',
'view_count': int,
},
}]
def _real_extract(self, url):
clip_id = self._match_id(url)
clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip')
clip_data = clip.get('clip_data')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4')
return {
'id': clip_id,
'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)),
'thumbnails': self._extract_thumbnails(clip_data.get('preview')),
'timestamp': parse_iso8601(clip.get('created_at')),
'view_count': int_or_none(clip.get('clip_views_count')),
'formats': formats,
'subtitles': subtitles,
}

View file

@ -0,0 +1,71 @@
from .common import InfoExtractor
from ..utils import merge_dicts, unified_timestamp, url_or_none
from ..utils.traversal import traverse_obj
class ZetlandDKArticleIE(InfoExtractor):
_VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))'
_TESTS = [{
'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel',
'info_dict': {
'id': 'sO9aq2MY-a81VP3BY-66e69',
'ext': 'mp3',
'modified_date': '20240118',
'title': 'Afsnit 1: “Det føltes som en kidnapning.” ',
'upload_date': '20240116',
'uploader_id': 'a81VP3BY',
'modified_timestamp': 1705568739,
'release_timestamp': 1705377592,
'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY',
'uploader': 'Helle Fuusager',
'release_date': '20240116',
'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg',
'description': 'md5:9619d426772c133f5abb26db27f26a01',
'timestamp': 1705377592,
'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14',
}
}]
def _real_extract(self, url):
display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
webpage = self._download_webpage(url, display_id)
next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']
story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story'))
formats = []
for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})):
formats.append({
'url': audio_url,
'vcodec': 'none',
})
return merge_dicts({
'id': display_id,
'formats': formats,
'uploader_id': uploader_id
}, traverse_obj(story_data, {
'title': ((('story_content', 'content', 'title'), 'title'), {str}),
'uploader': ('sharer', 'name'),
'uploader_id': ('sharer', 'sharer_id'),
'description': ('story_content', 'content', 'socialDescription'),
'series_id': ('story_content', 'meta', 'seriesId'),
'release_timestamp': ('published_at', {unified_timestamp}),
'modified_timestamp': ('revised_at', {unified_timestamp}),
}, get_all=False), traverse_obj(next_js_data, ('metaInfo', {
'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}),
'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}),
'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}),
'uploader_url': ('ld', 'author', 'url', {url_or_none}),
'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}),
'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}),
'release_timestamp': ('ld', 'datePublished', {unified_timestamp}),
'timestamp': ('ld', 'dateCreated', {unified_timestamp}),
}), get_all=False), {
'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage),
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
'uploader': self._html_search_meta(['author'], webpage),
'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)),
}, self._search_json_ld(webpage, display_id, fatal=False))