mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-16 21:33:19 +00:00
Merge branch 'master' into mutagen-metadata
This commit is contained in:
commit
8b3127cf67
104
.github/workflows/build.yml
vendored
104
.github/workflows/build.yml
vendored
|
@ -107,10 +107,10 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
- uses: conda-incubator/setup-miniconda@v3
|
||||
with:
|
||||
miniforge-variant: Mambaforge
|
||||
use-mamba: true
|
||||
|
@ -121,16 +121,14 @@ jobs:
|
|||
- name: Install Requirements
|
||||
run: |
|
||||
sudo apt -y install zip pandoc man sed
|
||||
reqs=$(mktemp)
|
||||
cat > "$reqs" << EOF
|
||||
cat > ./requirements.txt << EOF
|
||||
python=3.10.*
|
||||
pyinstaller
|
||||
cffi
|
||||
brotli-python
|
||||
secretstorage
|
||||
EOF
|
||||
sed -E '/^(brotli|secretstorage).*/d' requirements.txt >> "$reqs"
|
||||
mamba create -n build --file "$reqs"
|
||||
python devscripts/install_deps.py --print \
|
||||
--exclude brotli --exclude brotlicffi \
|
||||
--include secretstorage --include pyinstaller >> ./requirements.txt
|
||||
mamba create -n build --file ./requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -144,9 +142,9 @@ jobs:
|
|||
run: |
|
||||
unset LD_LIBRARY_PATH # Harmful; set by setup-python
|
||||
conda activate build
|
||||
python pyinst.py --onedir
|
||||
python -m bundle.pyinstaller --onedir
|
||||
(cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
|
||||
python pyinst.py
|
||||
python -m bundle.pyinstaller
|
||||
mv ./dist/yt-dlp_linux ./yt-dlp_linux
|
||||
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
|
||||
|
||||
|
@ -164,13 +162,15 @@ jobs:
|
|||
done
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
yt-dlp
|
||||
yt-dlp.tar.gz
|
||||
yt-dlp_linux
|
||||
yt-dlp_linux.zip
|
||||
compression-level: 0
|
||||
|
||||
linux_arm:
|
||||
needs: process
|
||||
|
@ -201,17 +201,18 @@ jobs:
|
|||
dockerRunArgs: --volume "${PWD}/repo:/repo"
|
||||
install: | # Installing Python 3.10 from the Deadsnakes repo raises errors
|
||||
apt update
|
||||
apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip
|
||||
apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip
|
||||
python3.8 -m pip install -U pip setuptools wheel
|
||||
# Cannot access requirements.txt from the repo directory at this stage
|
||||
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage
|
||||
# Cannot access any files from the repo directory at this stage
|
||||
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi
|
||||
|
||||
run: |
|
||||
cd repo
|
||||
python3.8 -m pip install -U Pyinstaller secretstorage -r requirements.txt # Cached version may be out of date
|
||||
python3.8 devscripts/install_deps.py -o --include build
|
||||
python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date
|
||||
python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
|
||||
python3.8 devscripts/make_lazy_extractors.py
|
||||
python3.8 pyinst.py
|
||||
python3.8 -m bundle.pyinstaller
|
||||
|
||||
if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then
|
||||
arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}"
|
||||
|
@ -224,10 +225,12 @@ jobs:
|
|||
fi
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-linux_${{ matrix.architecture }}
|
||||
path: | # run-on-arch-action designates armv7l as armv7
|
||||
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
|
||||
compression-level: 0
|
||||
|
||||
macos:
|
||||
needs: process
|
||||
|
@ -240,9 +243,10 @@ jobs:
|
|||
- name: Install Requirements
|
||||
run: |
|
||||
brew install coreutils
|
||||
python3 -m pip install -U --user pip setuptools wheel
|
||||
python3 devscripts/install_deps.py --user -o --include build
|
||||
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
|
||||
# We need to ignore wheels otherwise we break universal2 builds
|
||||
python3 -m pip install -U --user --no-binary :all: Pyinstaller -r requirements.txt
|
||||
python3 -m pip install -U --user --no-binary :all: -r requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -250,9 +254,9 @@ jobs:
|
|||
python3 devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python3 pyinst.py --target-architecture universal2 --onedir
|
||||
python3 -m bundle.pyinstaller --target-architecture universal2 --onedir
|
||||
(cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
|
||||
python3 pyinst.py --target-architecture universal2
|
||||
python3 -m bundle.pyinstaller --target-architecture universal2
|
||||
|
||||
- name: Verify --update-to
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
|
@ -265,11 +269,13 @@ jobs:
|
|||
[[ "$version" != "$downgraded_version" ]]
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_macos
|
||||
dist/yt-dlp_macos.zip
|
||||
compression-level: 0
|
||||
|
||||
macos_legacy:
|
||||
needs: process
|
||||
|
@ -293,8 +299,8 @@ jobs:
|
|||
- name: Install Requirements
|
||||
run: |
|
||||
brew install coreutils
|
||||
python3 -m pip install -U --user pip setuptools wheel
|
||||
python3 -m pip install -U --user Pyinstaller -r requirements.txt
|
||||
python3 devscripts/install_deps.py --user -o --include build
|
||||
python3 devscripts/install_deps.py --user --include pyinstaller
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -302,7 +308,7 @@ jobs:
|
|||
python3 devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python3 pyinst.py
|
||||
python3 -m bundle.pyinstaller
|
||||
mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy
|
||||
|
||||
- name: Verify --update-to
|
||||
|
@ -316,10 +322,12 @@ jobs:
|
|||
[[ "$version" != "$downgraded_version" ]]
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_macos_legacy
|
||||
compression-level: 0
|
||||
|
||||
windows:
|
||||
needs: process
|
||||
|
@ -328,13 +336,14 @@ jobs:
|
|||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with: # 3.8 is used for Win7 support
|
||||
python-version: "3.8"
|
||||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python -m pip install -U pip setuptools wheel py2exe
|
||||
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include py2exe
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -342,10 +351,10 @@ jobs:
|
|||
python devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python setup.py py2exe
|
||||
python -m bundle.py2exe
|
||||
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
|
||||
python pyinst.py
|
||||
python pyinst.py --onedir
|
||||
python -m bundle.pyinstaller
|
||||
python -m bundle.pyinstaller --onedir
|
||||
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
|
||||
|
||||
- name: Verify --update-to
|
||||
|
@ -362,12 +371,14 @@ jobs:
|
|||
}
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp.exe
|
||||
dist/yt-dlp_min.exe
|
||||
dist/yt-dlp_win.zip
|
||||
compression-level: 0
|
||||
|
||||
windows32:
|
||||
needs: process
|
||||
|
@ -376,14 +387,15 @@ jobs:
|
|||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.8"
|
||||
architecture: "x86"
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
python -m pip install -U pip setuptools wheel
|
||||
pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -391,7 +403,7 @@ jobs:
|
|||
python devscripts/make_lazy_extractors.py
|
||||
- name: Build
|
||||
run: |
|
||||
python pyinst.py
|
||||
python -m bundle.pyinstaller
|
||||
|
||||
- name: Verify --update-to
|
||||
if: vars.UPDATE_TO_VERIFICATION
|
||||
|
@ -407,10 +419,12 @@ jobs:
|
|||
}
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
dist/yt-dlp_x86.exe
|
||||
compression-level: 0
|
||||
|
||||
meta_files:
|
||||
if: inputs.meta_files && always() && !cancelled()
|
||||
|
@ -424,7 +438,11 @@ jobs:
|
|||
- windows32
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifact
|
||||
pattern: build-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Make SHA2-SUMS files
|
||||
run: |
|
||||
|
@ -459,8 +477,10 @@ jobs:
|
|||
done
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ github.job }}
|
||||
path: |
|
||||
SHA*SUMS*
|
||||
_update_spec
|
||||
SHA*SUMS*
|
||||
compression-level: 0
|
||||
|
|
4
.github/workflows/core.yml
vendored
4
.github/workflows/core.yml
vendored
|
@ -49,11 +49,11 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: pip install pytest -r requirements.txt
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
run: |
|
||||
|
|
8
.github/workflows/download.yml
vendored
8
.github/workflows/download.yml
vendored
|
@ -11,11 +11,11 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.9
|
||||
- name: Install test requirements
|
||||
run: pip install pytest -r requirements.txt
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
run: python3 ./devscripts/run_tests.py download
|
||||
|
@ -38,11 +38,11 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: pip install pytest -r requirements.txt
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
- name: Run tests
|
||||
continue-on-error: true
|
||||
run: python3 ./devscripts/run_tests.py download
|
||||
|
|
10
.github/workflows/quick-test.yml
vendored
10
.github/workflows/quick-test.yml
vendored
|
@ -11,11 +11,11 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python 3.8
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- name: Install test requirements
|
||||
run: pip install pytest -r requirements.txt
|
||||
run: python3 ./devscripts/install_deps.py --include dev
|
||||
- name: Run tests
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true
|
||||
|
@ -26,10 +26,10 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- name: Install flake8
|
||||
run: pip install flake8
|
||||
run: python3 ./devscripts/install_deps.py -o --include dev
|
||||
- name: Make lazy extractors
|
||||
run: python devscripts/make_lazy_extractors.py
|
||||
run: python3 ./devscripts/make_lazy_extractors.py
|
||||
- name: Run flake8
|
||||
run: flake8 .
|
||||
|
|
6
.github/workflows/release-master.yml
vendored
6
.github/workflows/release-master.yml
vendored
|
@ -6,8 +6,10 @@ on:
|
|||
paths:
|
||||
- "yt_dlp/**.py"
|
||||
- "!yt_dlp/version.py"
|
||||
- "setup.py"
|
||||
- "pyinst.py"
|
||||
- "bundle/*.py"
|
||||
- "pyproject.toml"
|
||||
- "Makefile"
|
||||
- ".github/workflows/build.yml"
|
||||
concurrency:
|
||||
group: release-master
|
||||
permissions:
|
||||
|
|
9
.github/workflows/release-nightly.yml
vendored
9
.github/workflows/release-nightly.yml
vendored
|
@ -18,7 +18,14 @@ jobs:
|
|||
- name: Check for new commits
|
||||
id: check_for_new_commits
|
||||
run: |
|
||||
relevant_files=("yt_dlp/*.py" ':!yt_dlp/version.py' "setup.py" "pyinst.py")
|
||||
relevant_files=(
|
||||
"yt_dlp/*.py"
|
||||
':!yt_dlp/version.py'
|
||||
"bundle/*.py"
|
||||
"pyproject.toml"
|
||||
"Makefile"
|
||||
".github/workflows/build.yml"
|
||||
)
|
||||
echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT"
|
||||
|
||||
release:
|
||||
|
|
26
.github/workflows/release.yml
vendored
26
.github/workflows/release.yml
vendored
|
@ -71,7 +71,7 @@ jobs:
|
|||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
|
@ -246,15 +246,16 @@ jobs:
|
|||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
sudo apt -y install pandoc man
|
||||
python -m pip install -U pip setuptools wheel twine
|
||||
python -m pip install -U -r requirements.txt
|
||||
python devscripts/install_deps.py -o --include build
|
||||
|
||||
- name: Prepare
|
||||
env:
|
||||
|
@ -266,14 +267,19 @@ jobs:
|
|||
run: |
|
||||
python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}"
|
||||
python devscripts/make_lazy_extractors.py
|
||||
sed -i -E "s/(name=')[^']+(', # package name)/\1${{ env.pypi_project }}\2/" setup.py
|
||||
sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml
|
||||
|
||||
- name: Build
|
||||
run: |
|
||||
rm -rf dist/*
|
||||
make pypi-files
|
||||
printf '%s\n\n' \
|
||||
'Official repository: <https://github.com/yt-dlp/yt-dlp>' \
|
||||
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github' > ./README.md.new
|
||||
cat ./README.md >> ./README.md.new && mv -f ./README.md.new ./README.md
|
||||
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
|
||||
python setup.py sdist bdist_wheel
|
||||
make clean-cache
|
||||
python -m build --no-isolation .
|
||||
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
|
@ -290,8 +296,12 @@ jobs:
|
|||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifact
|
||||
pattern: build-*
|
||||
merge-multiple: true
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
|
|
10
MANIFEST.in
10
MANIFEST.in
|
@ -1,10 +0,0 @@
|
|||
include AUTHORS
|
||||
include Changelog.md
|
||||
include LICENSE
|
||||
include README.md
|
||||
include completions/*/*
|
||||
include supportedsites.md
|
||||
include yt-dlp.1
|
||||
include requirements.txt
|
||||
recursive-include devscripts *
|
||||
recursive-include test *
|
61
Makefile
61
Makefile
|
@ -6,11 +6,11 @@ doc: README.md CONTRIBUTING.md issuetemplates supportedsites
|
|||
ot: offlinetest
|
||||
tar: yt-dlp.tar.gz
|
||||
|
||||
# Keep this list in sync with MANIFEST.in
|
||||
# Keep this list in sync with pyproject.toml includes/artifacts
|
||||
# intended use: when building a source distribution,
|
||||
# make pypi-files && python setup.py sdist
|
||||
# make pypi-files && python3 -m build -sn .
|
||||
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
|
||||
completions yt-dlp.1 requirements.txt setup.cfg devscripts/* test/*
|
||||
completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
|
||||
|
||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
||||
|
||||
|
@ -21,7 +21,7 @@ clean-test:
|
|||
*.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
|
||||
clean-cache:
|
||||
find . \( \
|
||||
-type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \
|
||||
|
@ -37,12 +37,15 @@ BINDIR ?= $(PREFIX)/bin
|
|||
MANDIR ?= $(PREFIX)/man
|
||||
SHAREDIR ?= $(PREFIX)/share
|
||||
PYTHON ?= /usr/bin/env python3
|
||||
GNUTAR ?= tar
|
||||
|
||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
|
||||
|
||||
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
|
||||
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
|
||||
# set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2
|
||||
PANDOC_VERSION_CMD = pandoc -v 2>/dev/null | head -n1 | cut -d' ' -f2 | head -c1
|
||||
PANDOC_VERSION != $(PANDOC_VERSION_CMD)
|
||||
PANDOC_VERSION ?= $(shell $(PANDOC_VERSION_CMD))
|
||||
MARKDOWN_CMD = if [ "$(PANDOC_VERSION)" = "1" -o "$(PANDOC_VERSION)" = "0" ]; then echo markdown; else echo markdown-smart; fi
|
||||
MARKDOWN != $(MARKDOWN_CMD)
|
||||
MARKDOWN ?= $(shell $(MARKDOWN_CMD))
|
||||
|
||||
install: lazy-extractors yt-dlp yt-dlp.1 completions
|
||||
mkdir -p $(DESTDIR)$(BINDIR)
|
||||
|
@ -73,24 +76,28 @@ test:
|
|||
offlinetest: codetest
|
||||
$(PYTHON) -m pytest -k "not download"
|
||||
|
||||
# XXX: This is hard to maintain
|
||||
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking
|
||||
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
|
||||
CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort
|
||||
CODE_FOLDERS != $(CODE_FOLDERS_CMD)
|
||||
CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD))
|
||||
CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done
|
||||
CODE_FILES != $(CODE_FILES_CMD)
|
||||
CODE_FILES ?= $(shell $(CODE_FILES_CMD))
|
||||
yt-dlp: $(CODE_FILES)
|
||||
mkdir -p zip
|
||||
for d in $(CODE_FOLDERS) ; do \
|
||||
mkdir -p zip/$$d ;\
|
||||
cp -pPR $$d/*.py zip/$$d/ ;\
|
||||
done
|
||||
touch -t 200001010101 zip/yt_dlp/*.py zip/yt_dlp/*/*.py
|
||||
(cd zip && touch -t 200001010101 $(CODE_FILES))
|
||||
mv zip/yt_dlp/__main__.py zip/
|
||||
cd zip ; zip -q ../yt-dlp yt_dlp/*.py yt_dlp/*/*.py __main__.py
|
||||
(cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py)
|
||||
rm -rf zip
|
||||
echo '#!$(PYTHON)' > yt-dlp
|
||||
cat yt-dlp.zip >> yt-dlp
|
||||
rm yt-dlp.zip
|
||||
chmod a+x yt-dlp
|
||||
|
||||
README.md: yt_dlp/*.py yt_dlp/*/*.py devscripts/make_readme.py
|
||||
README.md: $(CODE_FILES) devscripts/make_readme.py
|
||||
COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py
|
||||
|
||||
CONTRIBUTING.md: README.md devscripts/make_contributing.py
|
||||
|
@ -115,24 +122,26 @@ yt-dlp.1: README.md devscripts/prepare_manpage.py
|
|||
pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1
|
||||
rm -f yt-dlp.1.temp.md
|
||||
|
||||
completions/bash/yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/bash-completion.in
|
||||
completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in
|
||||
mkdir -p completions/bash
|
||||
$(PYTHON) devscripts/bash-completion.py
|
||||
|
||||
completions/zsh/_yt-dlp: yt_dlp/*.py yt_dlp/*/*.py devscripts/zsh-completion.in
|
||||
completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in
|
||||
mkdir -p completions/zsh
|
||||
$(PYTHON) devscripts/zsh-completion.py
|
||||
|
||||
completions/fish/yt-dlp.fish: yt_dlp/*.py yt_dlp/*/*.py devscripts/fish-completion.in
|
||||
completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in
|
||||
mkdir -p completions/fish
|
||||
$(PYTHON) devscripts/fish-completion.py
|
||||
|
||||
_EXTRACTOR_FILES = $(shell find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py')
|
||||
_EXTRACTOR_FILES_CMD = find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py'
|
||||
_EXTRACTOR_FILES != $(_EXTRACTOR_FILES_CMD)
|
||||
_EXTRACTOR_FILES ?= $(shell $(_EXTRACTOR_FILES_CMD))
|
||||
yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
|
||||
$(PYTHON) devscripts/make_lazy_extractors.py $@
|
||||
|
||||
yt-dlp.tar.gz: all
|
||||
@tar -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||
@$(GNUTAR) -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \
|
||||
--exclude '*.DS_Store' \
|
||||
--exclude '*.kate-swp' \
|
||||
--exclude '*.pyc' \
|
||||
|
@ -144,12 +153,8 @@ yt-dlp.tar.gz: all
|
|||
-- \
|
||||
README.md supportedsites.md Changelog.md LICENSE \
|
||||
CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \
|
||||
Makefile MANIFEST.in yt-dlp.1 README.txt completions \
|
||||
setup.py setup.cfg yt-dlp yt_dlp requirements.txt \
|
||||
devscripts test
|
||||
Makefile yt-dlp.1 README.txt completions .gitignore \
|
||||
setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test
|
||||
|
||||
AUTHORS: .mailmap
|
||||
git shortlog -s -n | cut -f2 | sort > AUTHORS
|
||||
|
||||
.mailmap:
|
||||
git shortlog -s -e -n | awk '!(out[$$NF]++) { $$1="";sub(/^[ \t]+/,""); print}' > .mailmap
|
||||
AUTHORS:
|
||||
git shortlog -s -n HEAD | cut -f2 | sort > AUTHORS
|
||||
|
|
52
README.md
52
README.md
|
@ -167,7 +167,8 @@ ### Differences in default behavior
|
|||
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
|
||||
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
|
||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
|
||||
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
|
||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
||||
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
|
||||
|
||||
|
||||
# INSTALLATION
|
||||
|
@ -280,7 +281,7 @@ ### Strongly recommended
|
|||
|
||||
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
|
||||
|
||||
There are bugs in ffmpeg that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||
There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
|
||||
|
||||
**Important**: What you need is ffmpeg *binary*, **NOT** [the python package of the same name](https://pypi.org/project/ffmpeg)
|
||||
|
||||
|
@ -320,19 +321,21 @@ ### Deprecated
|
|||
## COMPILE
|
||||
|
||||
### Standalone PyInstaller Builds
|
||||
To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). Once you have all the necessary dependencies installed, simply run `pyinst.py`. The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used.
|
||||
To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). The executable will be built for the same architecture (x86/ARM, 32/64 bit) as the Python used. You can run the following commands:
|
||||
|
||||
python3 -m pip install -U pyinstaller -r requirements.txt
|
||||
python3 devscripts/make_lazy_extractors.py
|
||||
python3 pyinst.py
|
||||
```
|
||||
python3 devscripts/install_deps.py --include pyinstaller
|
||||
python3 devscripts/make_lazy_extractors.py
|
||||
python3 -m bundle.pyinstaller
|
||||
```
|
||||
|
||||
On some systems, you may need to use `py` or `python` instead of `python3`.
|
||||
|
||||
`pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
|
||||
`bundle/pyinstaller.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate).
|
||||
|
||||
**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment.
|
||||
|
||||
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly.
|
||||
**Important**: Running `pyinstaller` directly **without** using `bundle/pyinstaller.py` is **not** officially supported. This may or may not work correctly.
|
||||
|
||||
### Platform-independent Binary (UNIX)
|
||||
You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
|
||||
|
@ -345,14 +348,17 @@ ### Standalone Py2Exe Builds (Windows)
|
|||
|
||||
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run.
|
||||
|
||||
If you wish to build it anyway, install Python and py2exe, and then simply run `setup.py py2exe`
|
||||
If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
|
||||
|
||||
py -m pip install -U py2exe -r requirements.txt
|
||||
py devscripts/make_lazy_extractors.py
|
||||
py setup.py py2exe
|
||||
```
|
||||
py devscripts/install_deps.py --include py2exe
|
||||
py devscripts/make_lazy_extractors.py
|
||||
py -m bundle.py2exe
|
||||
```
|
||||
|
||||
### Related scripts
|
||||
|
||||
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
|
||||
* **`devscripts/update-version.py`** - Update the version number based on current date.
|
||||
* **`devscripts/set-variant.py`** - Set the build variant of the executable.
|
||||
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
|
||||
|
@ -1305,7 +1311,8 @@ # OUTPUT TEMPLATE
|
|||
- `display_id` (string): An alternative identifier for the video
|
||||
- `uploader` (string): Full name of the video uploader
|
||||
- `license` (string): License name the video is licensed under
|
||||
- `creator` (string): The creator of the video
|
||||
- `creators` (list): The creators of the video
|
||||
- `creator` (string): The creators of the video; comma-separated
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date in UTC (YYYYMMDD)
|
||||
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
|
||||
|
@ -1380,11 +1387,15 @@ # OUTPUT TEMPLATE
|
|||
- `track_number` (numeric): Number of the track within an album or a disc
|
||||
- `track_id` (string): Id of the track
|
||||
- `artists` (list): Artist(s) of the track
|
||||
- `composers` (list): Composer(s) of the piece
|
||||
- `artist` (string): Artist(s) of the track; comma-separated
|
||||
- `genres` (list): Genre(s) of the track
|
||||
- `genre` (string): Genre(s) of the track; comma-separated
|
||||
- `composers` (list): Composer(s) of the piece
|
||||
- `composer` (string): Composer(s) of the piece; comma-separated
|
||||
- `album` (string): Title of the album the track belongs to
|
||||
- `album_type` (string): Type of the album
|
||||
- `album_artists` (list): List of all artists appeared on the album
|
||||
- `album_artists` (list): All artists appeared on the album
|
||||
- `album_artist` (string): All artists appeared on the album; comma-separated
|
||||
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
|
||||
|
||||
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
|
||||
|
@ -1762,11 +1773,11 @@ # MODIFYING METADATA
|
|||
`description`, `synopsis` | `description`
|
||||
`purl`, `comment` | `webpage_url`
|
||||
`track` | `track_number`
|
||||
`artist` | `artists`, `creator`, `uploader` or `uploader_id`
|
||||
`composer` | `composers`
|
||||
`genre` | `genres`
|
||||
`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
|
||||
`composer` | `composer` or `composers`
|
||||
`genre` | `genre` or `genres`
|
||||
`album` | `album`
|
||||
`album_artist` | `album_artists`
|
||||
`album_artist` | `album_artist` or `album_artists`
|
||||
`disc` | `disc_number`
|
||||
`show` | `series`
|
||||
`season_number` | `season_number`
|
||||
|
@ -1890,6 +1901,9 @@ #### nhkradirulive (NHK らじる★らじる LIVE)
|
|||
#### nflplusreplay
|
||||
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
|
||||
|
||||
#### jiosaavn
|
||||
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
|
|
1
bundle/__init__.py
Normal file
1
bundle/__init__.py
Normal file
|
@ -0,0 +1 @@
|
|||
# Empty file
|
59
bundle/py2exe.py
Executable file
59
bundle/py2exe.py
Executable file
|
@ -0,0 +1,59 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import warnings
|
||||
|
||||
from py2exe import freeze
|
||||
|
||||
from devscripts.utils import read_version
|
||||
|
||||
VERSION = read_version()
|
||||
|
||||
|
||||
def main():
|
||||
warnings.warn(
|
||||
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
|
||||
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
|
||||
|
||||
return freeze(
|
||||
console=[{
|
||||
'script': './yt_dlp/__main__.py',
|
||||
'dest_base': 'yt-dlp',
|
||||
'icon_resources': [(1, 'devscripts/logo.ico')],
|
||||
}],
|
||||
version_info={
|
||||
'version': VERSION,
|
||||
'description': 'A youtube-dl fork with additional features and patches',
|
||||
'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
|
||||
'product_name': 'yt-dlp',
|
||||
'product_version': VERSION,
|
||||
},
|
||||
options={
|
||||
'bundle_files': 0,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': './dist',
|
||||
'excludes': [
|
||||
# py2exe cannot import Crypto
|
||||
'Crypto',
|
||||
'Cryptodome',
|
||||
# py2exe appears to confuse this with our socks library.
|
||||
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
|
||||
'urllib3.contrib.socks'
|
||||
],
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
|
||||
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
|
||||
},
|
||||
zipfile=None,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
2
pyinst.py → bundle/pyinstaller.py
Normal file → Executable file
2
pyinst.py → bundle/pyinstaller.py
Normal file → Executable file
|
@ -4,7 +4,7 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import platform
|
||||
|
66
devscripts/install_deps.py
Executable file
66
devscripts/install_deps.py
Executable file
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from devscripts.tomlparse import parse_toml
|
||||
from devscripts.utils import read_file
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
|
||||
parser.add_argument(
|
||||
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
|
||||
parser.add_argument(
|
||||
'-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency')
|
||||
parser.add_argument(
|
||||
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
|
||||
parser.add_argument(
|
||||
'-o', '--only-optional', action='store_true', help='Only install optional dependencies')
|
||||
parser.add_argument(
|
||||
'-p', '--print', action='store_true', help='Only print a requirements.txt to stdout')
|
||||
parser.add_argument(
|
||||
'-u', '--user', action='store_true', help='Install with pip as --user')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
toml_data = parse_toml(read_file(args.input))
|
||||
deps = toml_data['project']['dependencies']
|
||||
targets = deps.copy() if not args.only_optional else []
|
||||
|
||||
for exclude in args.exclude or []:
|
||||
for dep in deps:
|
||||
simplified_dep = re.match(r'[\w-]+', dep)[0]
|
||||
if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep):
|
||||
targets.remove(dep)
|
||||
|
||||
optional_deps = toml_data['project']['optional-dependencies']
|
||||
for include in args.include or []:
|
||||
group = optional_deps.get(include)
|
||||
if group:
|
||||
targets.extend(group)
|
||||
|
||||
if args.print:
|
||||
for target in targets:
|
||||
print(target)
|
||||
return
|
||||
|
||||
pip_args = [sys.executable, '-m', 'pip', 'install', '-U']
|
||||
if args.user:
|
||||
pip_args.append('--user')
|
||||
pip_args.extend(targets)
|
||||
|
||||
return subprocess.call(pip_args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
189
devscripts/tomlparse.py
Executable file
189
devscripts/tomlparse.py
Executable file
|
@ -0,0 +1,189 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Simple parser for spec compliant toml files
|
||||
|
||||
A simple toml parser for files that comply with the spec.
|
||||
Should only be used to parse `pyproject.toml` for `install_deps.py`.
|
||||
|
||||
IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
|
||||
WS = r'(?:[\ \t]*)'
|
||||
STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
|
||||
SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
|
||||
KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
|
||||
EQUALS_RE = re.compile(rf'={WS}')
|
||||
WS_RE = re.compile(WS)
|
||||
|
||||
_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
|
||||
EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
|
||||
|
||||
LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
|
||||
LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
|
||||
|
||||
|
||||
def parse_key(value: str):
|
||||
for match in SINGLE_KEY_RE.finditer(value):
|
||||
if match[0][0] == '"':
|
||||
yield json.loads(match[0])
|
||||
elif match[0][0] == '\'':
|
||||
yield match[0][1:-1]
|
||||
else:
|
||||
yield match[0]
|
||||
|
||||
|
||||
def get_target(root: dict, paths: list[str], is_list=False):
|
||||
target = root
|
||||
|
||||
for index, key in enumerate(paths, 1):
|
||||
use_list = is_list and index == len(paths)
|
||||
result = target.get(key)
|
||||
if result is None:
|
||||
result = [] if use_list else {}
|
||||
target[key] = result
|
||||
|
||||
if isinstance(result, dict):
|
||||
target = result
|
||||
elif use_list:
|
||||
target = {}
|
||||
result.append(target)
|
||||
else:
|
||||
target = result[-1]
|
||||
|
||||
assert isinstance(target, dict)
|
||||
return target
|
||||
|
||||
|
||||
def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
|
||||
index += 1
|
||||
|
||||
if match := ws_re.match(data, index):
|
||||
index = match.end()
|
||||
|
||||
while data[index] != end:
|
||||
index = yield True, index
|
||||
|
||||
if match := ws_re.match(data, index):
|
||||
index = match.end()
|
||||
|
||||
if data[index] == ',':
|
||||
index += 1
|
||||
|
||||
if match := ws_re.match(data, index):
|
||||
index = match.end()
|
||||
|
||||
assert data[index] == end
|
||||
yield False, index + 1
|
||||
|
||||
|
||||
def parse_value(data: str, index: int):
|
||||
if data[index] == '[':
|
||||
result = []
|
||||
|
||||
indices = parse_enclosed(data, index, ']', LIST_WS_RE)
|
||||
valid, index = next(indices)
|
||||
while valid:
|
||||
index, value = parse_value(data, index)
|
||||
result.append(value)
|
||||
valid, index = indices.send(index)
|
||||
|
||||
return index, result
|
||||
|
||||
if data[index] == '{':
|
||||
result = {}
|
||||
|
||||
indices = parse_enclosed(data, index, '}', WS_RE)
|
||||
valid, index = next(indices)
|
||||
while valid:
|
||||
valid, index = indices.send(parse_kv_pair(data, index, result))
|
||||
|
||||
return index, result
|
||||
|
||||
if match := STRING_RE.match(data, index):
|
||||
return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
|
||||
|
||||
match = LEFTOVER_VALUE_RE.match(data, index)
|
||||
assert match
|
||||
value = match[0].strip()
|
||||
for func in [
|
||||
int,
|
||||
float,
|
||||
datetime.time.fromisoformat,
|
||||
datetime.date.fromisoformat,
|
||||
datetime.datetime.fromisoformat,
|
||||
{'true': True, 'false': False}.get,
|
||||
]:
|
||||
try:
|
||||
value = func(value)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return match.end(), value
|
||||
|
||||
|
||||
def parse_kv_pair(data: str, index: int, target: dict):
|
||||
match = KEY_RE.match(data, index)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
*keys, key = parse_key(match[0])
|
||||
|
||||
match = EQUALS_RE.match(data, match.end())
|
||||
assert match
|
||||
index = match.end()
|
||||
|
||||
index, value = parse_value(data, index)
|
||||
get_target(target, keys)[key] = value
|
||||
return index
|
||||
|
||||
|
||||
def parse_toml(data: str):
|
||||
root = {}
|
||||
target = root
|
||||
|
||||
index = 0
|
||||
while True:
|
||||
match = EXPRESSION_RE.search(data, index)
|
||||
if not match:
|
||||
break
|
||||
|
||||
if match.group('subtable'):
|
||||
index = match.end()
|
||||
path, is_list = match.group('path', 'is_list')
|
||||
target = get_target(root, list(parse_key(path)), bool(is_list))
|
||||
continue
|
||||
|
||||
index = parse_kv_pair(data, match.start(), target)
|
||||
assert index is not None
|
||||
|
||||
return root
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('infile', type=Path, help='The TOML file to read as input')
|
||||
args = parser.parse_args()
|
||||
|
||||
with args.infile.open('r', encoding='utf-8') as file:
|
||||
data = file.read()
|
||||
|
||||
def default(obj):
|
||||
if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
|
||||
return obj.isoformat()
|
||||
|
||||
print(json.dumps(parse_toml(data), default=default))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
121
pyproject.toml
121
pyproject.toml
|
@ -1,5 +1,118 @@
|
|||
[build-system]
|
||||
build-backend = 'setuptools.build_meta'
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/5941
|
||||
# https://github.com/pypa/distutils/issues/17
|
||||
requires = ['setuptools > 50']
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "yt-dlp"
|
||||
maintainers = [
|
||||
{name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"},
|
||||
{name = "Grub4K", email = "contact@grub4k.xyz"},
|
||||
{name = "bashonly", email = "bashonly@protonmail.com"},
|
||||
]
|
||||
description = "A youtube-dl fork with additional features and patches"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
keywords = [
|
||||
"youtube-dl",
|
||||
"video-downloader",
|
||||
"youtube-downloader",
|
||||
"sponsorblock",
|
||||
"youtube-dlc",
|
||||
"yt-dlp",
|
||||
]
|
||||
license = {file = "LICENSE"}
|
||||
classifiers = [
|
||||
"Topic :: Multimedia :: Video",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Console",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: Implementation",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
"License :: OSI Approved :: The Unlicense (Unlicense)",
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
dependencies = [
|
||||
"brotli; implementation_name=='cpython'",
|
||||
"brotlicffi; implementation_name!='cpython'",
|
||||
"certifi",
|
||||
"mutagen",
|
||||
"pycryptodomex",
|
||||
"requests>=2.31.0,<3",
|
||||
"urllib3>=1.26.17,<3",
|
||||
"websockets>=12.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
secretstorage = [
|
||||
"cffi",
|
||||
"secretstorage",
|
||||
]
|
||||
build = [
|
||||
"build",
|
||||
"hatchling",
|
||||
"pip",
|
||||
"wheel",
|
||||
]
|
||||
dev = [
|
||||
"flake8",
|
||||
"isort",
|
||||
"pytest",
|
||||
]
|
||||
pyinstaller = ["pyinstaller>=6.3"]
|
||||
py2exe = ["py2exe>=0.12"]
|
||||
|
||||
[project.urls]
|
||||
Documentation = "https://github.com/yt-dlp/yt-dlp#readme"
|
||||
Repository = "https://github.com/yt-dlp/yt-dlp"
|
||||
Tracker = "https://github.com/yt-dlp/yt-dlp/issues"
|
||||
Funding = "https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators"
|
||||
|
||||
[project.scripts]
|
||||
yt-dlp = "yt_dlp:main"
|
||||
|
||||
[project.entry-points.pyinstaller40]
|
||||
hook-dirs = "yt_dlp.__pyinstaller:get_hook_dirs"
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
include = [
|
||||
"/yt_dlp",
|
||||
"/devscripts",
|
||||
"/test",
|
||||
"/.gitignore", # included by default, needed for auto-excludes
|
||||
"/Changelog.md",
|
||||
"/LICENSE", # included as license
|
||||
"/pyproject.toml", # included by default
|
||||
"/README.md", # included as readme
|
||||
"/setup.cfg",
|
||||
"/supportedsites.md",
|
||||
]
|
||||
artifacts = [
|
||||
"/yt_dlp/extractor/lazy_extractors.py",
|
||||
"/completions",
|
||||
"/AUTHORS", # included by default
|
||||
"/README.txt",
|
||||
"/yt-dlp.1",
|
||||
]
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["yt_dlp"]
|
||||
artifacts = ["/yt_dlp/extractor/lazy_extractors.py"]
|
||||
|
||||
[tool.hatch.build.targets.wheel.shared-data]
|
||||
"completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp"
|
||||
"completions/zsh/_yt-dlp" = "share/zsh/site-functions/_yt-dlp"
|
||||
"completions/fish/yt-dlp.fish" = "share/fish/vendor_completions.d/yt-dlp.fish"
|
||||
"README.txt" = "share/doc/yt_dlp/README.txt"
|
||||
"yt-dlp.1" = "share/man/man1/yt-dlp.1"
|
||||
|
||||
[tool.hatch.version]
|
||||
path = "yt_dlp/version.py"
|
||||
pattern = "_pkg_version = '(?P<version>[^']+)'"
|
||||
|
|
|
@ -1,8 +0,0 @@
|
|||
mutagen
|
||||
pycryptodomex
|
||||
brotli; implementation_name=='cpython'
|
||||
brotlicffi; implementation_name!='cpython'
|
||||
certifi
|
||||
requests>=2.31.0,<3
|
||||
urllib3>=1.26.17,<3
|
||||
websockets>=12.0
|
|
@ -1,7 +1,3 @@
|
|||
[wheel]
|
||||
universal = true
|
||||
|
||||
|
||||
[flake8]
|
||||
exclude = build,venv,.tox,.git,.pytest_cache
|
||||
ignore = E402,E501,E731,E741,W503
|
||||
|
|
183
setup.py
183
setup.py
|
@ -1,183 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Allow execution from anywhere
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
import subprocess
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from setuptools import Command, find_packages, setup
|
||||
setuptools_available = True
|
||||
except ImportError:
|
||||
from distutils.core import Command, setup
|
||||
setuptools_available = False
|
||||
|
||||
from devscripts.utils import read_file, read_version
|
||||
|
||||
VERSION = read_version(varname='_pkg_version')
|
||||
|
||||
DESCRIPTION = 'A youtube-dl fork with additional features and patches'
|
||||
|
||||
LONG_DESCRIPTION = '\n\n'.join((
|
||||
'Official repository: <https://github.com/yt-dlp/yt-dlp>',
|
||||
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
|
||||
read_file('README.md')))
|
||||
|
||||
REQUIREMENTS = read_file('requirements.txt').splitlines()
|
||||
|
||||
|
||||
def packages():
|
||||
if setuptools_available:
|
||||
return find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts'))
|
||||
|
||||
return [
|
||||
'yt_dlp', 'yt_dlp.extractor', 'yt_dlp.downloader', 'yt_dlp.postprocessor', 'yt_dlp.compat',
|
||||
]
|
||||
|
||||
|
||||
def py2exe_params():
|
||||
warnings.warn(
|
||||
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
|
||||
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
|
||||
|
||||
return {
|
||||
'console': [{
|
||||
'script': './yt_dlp/__main__.py',
|
||||
'dest_base': 'yt-dlp',
|
||||
'icon_resources': [(1, 'devscripts/logo.ico')],
|
||||
}],
|
||||
'version_info': {
|
||||
'version': VERSION,
|
||||
'description': DESCRIPTION,
|
||||
'comments': LONG_DESCRIPTION.split('\n')[0],
|
||||
'product_name': 'yt-dlp',
|
||||
'product_version': VERSION,
|
||||
},
|
||||
'options': {
|
||||
'bundle_files': 0,
|
||||
'compressed': 1,
|
||||
'optimize': 2,
|
||||
'dist_dir': './dist',
|
||||
'excludes': [
|
||||
# py2exe cannot import Crypto
|
||||
'Crypto',
|
||||
'Cryptodome',
|
||||
# py2exe appears to confuse this with our socks library.
|
||||
# We don't use pysocks and urllib3.contrib.socks would fail to import if tried.
|
||||
'urllib3.contrib.socks'
|
||||
],
|
||||
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
|
||||
# Modules that are only imported dynamically must be added here
|
||||
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
|
||||
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
|
||||
},
|
||||
'zipfile': None,
|
||||
}
|
||||
|
||||
|
||||
def build_params():
|
||||
files_spec = [
|
||||
('share/bash-completion/completions', ['completions/bash/yt-dlp']),
|
||||
('share/zsh/site-functions', ['completions/zsh/_yt-dlp']),
|
||||
('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']),
|
||||
('share/doc/yt_dlp', ['README.txt']),
|
||||
('share/man/man1', ['yt-dlp.1'])
|
||||
]
|
||||
data_files = []
|
||||
for dirname, files in files_spec:
|
||||
resfiles = []
|
||||
for fn in files:
|
||||
if not os.path.exists(fn):
|
||||
warnings.warn(f'Skipping file {fn} since it is not present. Try running " make pypi-files " first')
|
||||
else:
|
||||
resfiles.append(fn)
|
||||
data_files.append((dirname, resfiles))
|
||||
|
||||
params = {'data_files': data_files}
|
||||
|
||||
if setuptools_available:
|
||||
params['entry_points'] = {
|
||||
'console_scripts': ['yt-dlp = yt_dlp:main'],
|
||||
'pyinstaller40': ['hook-dirs = yt_dlp.__pyinstaller:get_hook_dirs'],
|
||||
}
|
||||
else:
|
||||
params['scripts'] = ['yt-dlp']
|
||||
return params
|
||||
|
||||
|
||||
class build_lazy_extractors(Command):
|
||||
description = 'Build the extractor lazy loading module'
|
||||
user_options = []
|
||||
|
||||
def initialize_options(self):
|
||||
pass
|
||||
|
||||
def finalize_options(self):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
if self.dry_run:
|
||||
print('Skipping build of lazy extractors in dry run mode')
|
||||
return
|
||||
subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py'])
|
||||
|
||||
|
||||
def main():
|
||||
if sys.argv[1:2] == ['py2exe']:
|
||||
params = py2exe_params()
|
||||
try:
|
||||
from py2exe import freeze
|
||||
except ImportError:
|
||||
import py2exe # noqa: F401
|
||||
warnings.warn('You are using an outdated version of py2exe. Support for this version will be removed in the future')
|
||||
params['console'][0].update(params.pop('version_info'))
|
||||
params['options'] = {'py2exe': params.pop('options')}
|
||||
else:
|
||||
return freeze(**params)
|
||||
else:
|
||||
params = build_params()
|
||||
|
||||
setup(
|
||||
name='yt-dlp', # package name (do not change/remove comment)
|
||||
version=VERSION,
|
||||
maintainer='pukkandan',
|
||||
maintainer_email='pukkandan.ytdlp@gmail.com',
|
||||
description=DESCRIPTION,
|
||||
long_description=LONG_DESCRIPTION,
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://github.com/yt-dlp/yt-dlp',
|
||||
packages=packages(),
|
||||
install_requires=REQUIREMENTS,
|
||||
python_requires='>=3.8',
|
||||
project_urls={
|
||||
'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme',
|
||||
'Source': 'https://github.com/yt-dlp/yt-dlp',
|
||||
'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues',
|
||||
'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators',
|
||||
},
|
||||
classifiers=[
|
||||
'Topic :: Multimedia :: Video',
|
||||
'Development Status :: 5 - Production/Stable',
|
||||
'Environment :: Console',
|
||||
'Programming Language :: Python',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
'Programming Language :: Python :: 3.12',
|
||||
'Programming Language :: Python :: Implementation',
|
||||
'Programming Language :: Python :: Implementation :: CPython',
|
||||
'Programming Language :: Python :: Implementation :: PyPy',
|
||||
'License :: Public Domain',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||
**params
|
||||
)
|
||||
|
||||
|
||||
main()
|
|
@ -10,7 +10,7 @@
|
|||
import yt_dlp.extractor
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_os_name
|
||||
from yt_dlp.utils import preferredencoding, try_call, write_string
|
||||
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
|
||||
|
||||
if 'pytest' in sys.modules:
|
||||
import pytest
|
||||
|
@ -223,6 +223,10 @@ def sanitize(key, value):
|
|||
if test_info_dict.get('display_id') == test_info_dict.get('id'):
|
||||
test_info_dict.pop('display_id')
|
||||
|
||||
# Remove deprecated fields
|
||||
for old in YoutubeDL._deprecated_multivalue_fields.keys():
|
||||
test_info_dict.pop(old, None)
|
||||
|
||||
# release_year may be generated from release_date
|
||||
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
|
||||
test_info_dict.pop('release_year')
|
||||
|
@ -329,3 +333,8 @@ def http_server_port(httpd):
|
|||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
def verify_address_availability(address):
|
||||
if find_available_port(address) is None:
|
||||
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
|
||||
|
|
|
@ -941,7 +941,7 @@ def test_match_filter(self):
|
|||
def get_videos(filter_=None):
|
||||
ydl = YDL({'match_filter': filter_, 'simulate': True})
|
||||
for v in videos:
|
||||
ydl.process_ie_result(v, download=True)
|
||||
ydl.process_ie_result(v.copy(), download=True)
|
||||
return [v['id'] for v in ydl.downloaded_info_dicts]
|
||||
|
||||
res = get_videos()
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
import http.cookiejar
|
||||
import http.server
|
||||
import io
|
||||
import logging
|
||||
import pathlib
|
||||
import random
|
||||
import ssl
|
||||
|
@ -26,7 +27,7 @@
|
|||
from email.message import Message
|
||||
from http.cookiejar import CookieJar
|
||||
|
||||
from test.helper import FakeYDL, http_server_port
|
||||
from test.helper import FakeYDL, http_server_port, verify_address_availability
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import brotli, requests, urllib3
|
||||
from yt_dlp.networking import (
|
||||
|
@ -180,6 +181,12 @@ def do_GET(self):
|
|||
self.send_header('Location', '/a/b/./../../headers')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path == '/redirect_dotsegments_absolute':
|
||||
self.send_response(301)
|
||||
# redirect to /headers but with dot segments before - absolute url
|
||||
self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
|
||||
self.send_header('Content-Length', '0')
|
||||
self.end_headers()
|
||||
elif self.path.startswith('/redirect_'):
|
||||
self._redirect()
|
||||
elif self.path.startswith('/method'):
|
||||
|
@ -345,16 +352,17 @@ def test_percent_encode(self, handler):
|
|||
res.close()
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_remove_dot_segments(self, handler):
|
||||
with handler() as rh:
|
||||
@pytest.mark.parametrize('path', [
|
||||
'/a/b/./../../headers',
|
||||
'/redirect_dotsegments',
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/9020
|
||||
'/redirect_dotsegments_absolute',
|
||||
])
|
||||
def test_remove_dot_segments(self, handler, path):
|
||||
with handler(verbose=True) as rh:
|
||||
# This isn't a comprehensive test,
|
||||
# but it should be enough to check whether the handler is removing dot segments
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
|
||||
assert res.status == 200
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
|
||||
# but it should be enough to check whether the handler is removing dot segments in required scenarios
|
||||
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
|
||||
assert res.status == 200
|
||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||
res.close()
|
||||
|
@ -538,6 +546,9 @@ def test_timeout(self, handler):
|
|||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
# on some systems these loopback addresses we need for testing may not be available
|
||||
# see: https://github.com/yt-dlp/yt-dlp/issues/8890
|
||||
verify_address_availability(source_address)
|
||||
with handler(source_address=source_address) as rh:
|
||||
data = validate_and_send(
|
||||
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
|
||||
|
@ -742,6 +753,25 @@ def test_certificate_nocombined_pass(self, handler):
|
|||
})
|
||||
|
||||
|
||||
class TestRequestHandlerMisc:
|
||||
"""Misc generic tests for request handlers, not related to request or validation testing"""
|
||||
@pytest.mark.parametrize('handler,logger_name', [
|
||||
('Requests', 'urllib3'),
|
||||
('Websockets', 'websockets.client'),
|
||||
('Websockets', 'websockets.server')
|
||||
], indirect=['handler'])
|
||||
def test_remove_logging_handler(self, handler, logger_name):
|
||||
# Ensure any logging handlers, which may contain a YoutubeDL instance,
|
||||
# are removed when we close the request handler
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/8922
|
||||
logging_handlers = logging.getLogger(logger_name).handlers
|
||||
before_count = len(logging_handlers)
|
||||
rh = handler()
|
||||
assert len(logging_handlers) == before_count + 1
|
||||
rh.close()
|
||||
assert len(logging_handlers) == before_count
|
||||
|
||||
|
||||
class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||
def test_file_urls(self, handler):
|
||||
|
@ -817,6 +847,7 @@ def test_httplib_validation_errors(self, handler, req, match, version_check):
|
|||
assert not isinstance(exc_info.value, TransportError)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
class TestRequestsRequestHandler(TestRequestHandlerBase):
|
||||
@pytest.mark.parametrize('raised,expected', [
|
||||
(lambda: requests.exceptions.ConnectTimeout(), TransportError),
|
||||
|
@ -833,7 +864,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
|
|||
(lambda: requests.exceptions.RequestException(), RequestError)
|
||||
# (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object
|
||||
])
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
|
||||
with handler() as rh:
|
||||
def mock_get_instance(*args, **kwargs):
|
||||
|
@ -867,7 +897,6 @@ def request(self, *args, **kwargs):
|
|||
'3 bytes read, 5 more expected'
|
||||
),
|
||||
])
|
||||
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
|
||||
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
|
||||
from requests.models import Response as RequestsResponse
|
||||
from urllib3.response import HTTPResponse as Urllib3Response
|
||||
|
@ -886,6 +915,21 @@ def mock_read(*args, **kwargs):
|
|||
|
||||
assert exc_info.type is expected
|
||||
|
||||
def test_close(self, handler, monkeypatch):
|
||||
rh = handler()
|
||||
session = rh._get_instance(cookiejar=rh.cookiejar)
|
||||
called = False
|
||||
original_close = session.close
|
||||
|
||||
def mock_close(*args, **kwargs):
|
||||
nonlocal called
|
||||
called = True
|
||||
return original_close(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(session, 'close', mock_close)
|
||||
rh.close()
|
||||
assert called
|
||||
|
||||
|
||||
def run_validation(handler, error, req, **handler_kwargs):
|
||||
with handler(**handler_kwargs) as rh:
|
||||
|
@ -1195,6 +1239,19 @@ def some_preference(rh, request):
|
|||
assert director.send(Request('http://')).read() == b''
|
||||
assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported'
|
||||
|
||||
def test_close(self, monkeypatch):
|
||||
director = RequestDirector(logger=FakeLogger())
|
||||
director.add_handler(FakeRH(logger=FakeLogger()))
|
||||
called = False
|
||||
|
||||
def mock_close(*args, **kwargs):
|
||||
nonlocal called
|
||||
called = True
|
||||
|
||||
monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close)
|
||||
director.close()
|
||||
assert called
|
||||
|
||||
|
||||
# XXX: do we want to move this to test_YoutubeDL.py?
|
||||
class TestYoutubeDLNetworking:
|
||||
|
|
|
@ -8,13 +8,9 @@
|
|||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import platform
|
||||
import random
|
||||
import ssl
|
||||
import urllib.error
|
||||
import warnings
|
||||
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.dependencies import certifi
|
||||
|
@ -30,7 +26,6 @@
|
|||
from yt_dlp.networking.exceptions import (
|
||||
HTTPError,
|
||||
IncompleteRead,
|
||||
_CompatHTTPError,
|
||||
)
|
||||
from yt_dlp.socks import ProxyType
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
@ -179,11 +174,10 @@ class TestNetworkingExceptions:
|
|||
def create_response(status):
|
||||
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
|
||||
def test_http_error(self, http_error_class):
|
||||
def test_http_error(self):
|
||||
|
||||
response = self.create_response(403)
|
||||
error = http_error_class(response)
|
||||
error = HTTPError(response)
|
||||
|
||||
assert error.status == 403
|
||||
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
|
||||
|
@ -194,80 +188,12 @@ def test_http_error(self, http_error_class):
|
|||
assert data == b'test'
|
||||
assert repr(error) == '<HTTPError 403: Forbidden>'
|
||||
|
||||
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
|
||||
def test_redirect_http_error(self, http_error_class):
|
||||
def test_redirect_http_error(self):
|
||||
response = self.create_response(301)
|
||||
error = http_error_class(response, redirect_loop=True)
|
||||
error = HTTPError(response, redirect_loop=True)
|
||||
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
|
||||
assert error.reason == 'Moved Permanently'
|
||||
|
||||
def test_compat_http_error(self):
|
||||
response = self.create_response(403)
|
||||
error = _CompatHTTPError(HTTPError(response))
|
||||
assert isinstance(error, HTTPError)
|
||||
assert isinstance(error, urllib.error.HTTPError)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def raises_deprecation_warning():
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('always')
|
||||
yield
|
||||
|
||||
if len(w) == 0:
|
||||
pytest.fail('Did not raise DeprecationWarning')
|
||||
if len(w) > 1:
|
||||
pytest.fail(f'Raised multiple warnings: {w}')
|
||||
|
||||
if not issubclass(w[-1].category, DeprecationWarning):
|
||||
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
|
||||
w.clear()
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.code == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.getcode() == 403
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.hdrs is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.info() is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.headers is error.response.headers
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.filename == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.url == error.response.url
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert error.geturl() == error.response.url
|
||||
|
||||
# Passthrough file operations
|
||||
with raises_deprecation_warning():
|
||||
assert error.read() == b'test'
|
||||
|
||||
with raises_deprecation_warning():
|
||||
assert not error.closed
|
||||
|
||||
with raises_deprecation_warning():
|
||||
# Technically Response operations are also passed through, which should not be used.
|
||||
assert error.get_header('test') == 'test'
|
||||
|
||||
# Should not raise a warning
|
||||
error.close()
|
||||
|
||||
@pytest.mark.skipif(
|
||||
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
|
||||
def test_compat_http_error_autoclose(self):
|
||||
# Compat HTTPError should not autoclose response
|
||||
response = self.create_response(403)
|
||||
_CompatHTTPError(HTTPError(response))
|
||||
assert not response.closed
|
||||
|
||||
def test_incomplete_read_error(self):
|
||||
error = IncompleteRead(4, 3, cause='test')
|
||||
assert isinstance(error, IncompleteRead)
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
ThreadingTCPServer,
|
||||
)
|
||||
|
||||
from test.helper import http_server_port
|
||||
from test.helper import http_server_port, verify_address_availability
|
||||
from yt_dlp.networking import Request
|
||||
from yt_dlp.networking.exceptions import ProxyError, TransportError
|
||||
from yt_dlp.socks import (
|
||||
|
@ -326,6 +326,7 @@ def test_socks4a_domain_target(self, handler, ctx):
|
|||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks4ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(proxies={'all': f'socks4://{server_address}'},
|
||||
source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
|
@ -441,6 +442,7 @@ def test_ipv6_socks5_proxy(self, handler, ctx):
|
|||
def test_ipv4_client_source_address(self, handler, ctx):
|
||||
with ctx.socks_server(Socks5ProxyHandler) as server_address:
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
|
||||
response = ctx.socks_info_request(rh)
|
||||
assert response['client_address'][0] == source_address
|
||||
|
|
|
@ -2340,6 +2340,58 @@ def test_traverse_obj(self):
|
|||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give group name as well')
|
||||
|
||||
# Test xml.etree.ElementTree.Element as input obj
|
||||
etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
|
||||
<data>
|
||||
<country name="Liechtenstein">
|
||||
<rank>1</rank>
|
||||
<year>2008</year>
|
||||
<gdppc>141100</gdppc>
|
||||
<neighbor name="Austria" direction="E"/>
|
||||
<neighbor name="Switzerland" direction="W"/>
|
||||
</country>
|
||||
<country name="Singapore">
|
||||
<rank>4</rank>
|
||||
<year>2011</year>
|
||||
<gdppc>59900</gdppc>
|
||||
<neighbor name="Malaysia" direction="N"/>
|
||||
</country>
|
||||
<country name="Panama">
|
||||
<rank>68</rank>
|
||||
<year>2011</year>
|
||||
<gdppc>13600</gdppc>
|
||||
<neighbor name="Costa Rica" direction="W"/>
|
||||
<neighbor name="Colombia" direction="E"/>
|
||||
</country>
|
||||
</data>''')
|
||||
self.assertEqual(traverse_obj(etree, ''), etree,
|
||||
msg='empty str key should return the element itself')
|
||||
self.assertEqual(traverse_obj(etree, 'country'), list(etree),
|
||||
msg='str key should lead all children with that tag name')
|
||||
self.assertEqual(traverse_obj(etree, ...), list(etree),
|
||||
msg='`...` as key should return all children')
|
||||
self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
|
||||
msg='function as key should get element as value')
|
||||
self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
|
||||
msg='function as key should get index as key')
|
||||
self.assertEqual(traverse_obj(etree, 0), etree[0],
|
||||
msg='int key should return the nth child')
|
||||
self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
|
||||
['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
|
||||
msg='`@<attribute>` at end of path should give that attribute')
|
||||
self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
|
||||
msg='`@<nonexistant>` at end of path should give `None`')
|
||||
self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
|
||||
msg='`@` should give the full attribute dict')
|
||||
self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
|
||||
msg='`text()` at end of path should give the inner text')
|
||||
self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
|
||||
msg='full python xpath features should be supported')
|
||||
self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
|
||||
msg='special transformations should act on current element')
|
||||
self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100],
|
||||
msg='special transformations should act on current element')
|
||||
|
||||
def test_http_header_dict(self):
|
||||
headers = HTTPHeaderDict()
|
||||
headers['ytdl-test'] = b'0'
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
import pytest
|
||||
|
||||
from test.helper import verify_address_availability
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import http.client
|
||||
|
@ -227,6 +229,7 @@ def test_cookies(self, handler):
|
|||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||
def test_source_address(self, handler):
|
||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||
verify_address_availability(source_address)
|
||||
with handler(source_address=source_address) as rh:
|
||||
ws = validate_and_send(rh, Request(self.ws_base_url))
|
||||
ws.send('source_address')
|
||||
|
|
|
@ -40,7 +40,6 @@
|
|||
NoSupportingHandlers,
|
||||
RequestError,
|
||||
SSLError,
|
||||
_CompatHTTPError,
|
||||
network_exceptions,
|
||||
)
|
||||
from .plugins import directories as plugin_directories
|
||||
|
@ -581,6 +580,13 @@ class YoutubeDL:
|
|||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
|
||||
}
|
||||
_deprecated_multivalue_fields = {
|
||||
'album_artist': 'album_artists',
|
||||
'artist': 'artists',
|
||||
'composer': 'composers',
|
||||
'creator': 'creators',
|
||||
'genre': 'genres',
|
||||
}
|
||||
_format_selection_exts = {
|
||||
'audio': set(MEDIA_EXTENSIONS.common_audio),
|
||||
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
|
||||
|
@ -2452,7 +2458,7 @@ def selector_function(ctx):
|
|||
# for extractors with incomplete formats (audio only (soundcloud)
|
||||
# or video only (imgur)) best/worst will fallback to
|
||||
# best/worst {video,audio}-only format
|
||||
matches = formats
|
||||
matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
|
||||
elif seperate_fallback and not ctx['has_merged_format']:
|
||||
# for compatibility with youtube-dl when there is no pre-merged format
|
||||
matches = list(filter(seperate_fallback, formats))
|
||||
|
@ -2641,15 +2647,13 @@ def _fill_common_fields(self, info_dict, final=True):
|
|||
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
|
||||
deprecated_multivalue_fields = {
|
||||
'artist': 'artists',
|
||||
'composer': 'composers',
|
||||
'album_artist': 'album_artists',
|
||||
'genre': 'genres',
|
||||
}
|
||||
for deprecated_field, new_field in deprecated_multivalue_fields.items():
|
||||
if info_dict.get(deprecated_field):
|
||||
info_dict[new_field] = re.split(r', ?', info_dict[deprecated_field])
|
||||
for old_key, new_key in self._deprecated_multivalue_fields.items():
|
||||
if new_key in info_dict and old_key in info_dict:
|
||||
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
|
||||
elif old_value := info_dict.get(old_key):
|
||||
info_dict[new_key] = old_value.split(', ')
|
||||
elif new_value := info_dict.get(new_key):
|
||||
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
|
||||
|
||||
def _raise_pending_errors(self, info):
|
||||
err = info.pop('__pending_error', None)
|
||||
|
@ -3494,7 +3498,8 @@ def ffmpeg_fixup(cndn, msg, cls):
|
|||
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
||||
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
||||
FFmpegFixupM3u8PP)
|
||||
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
|
||||
ffmpeg_fixup(downloader == 'dashsegments'
|
||||
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
|
||||
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
|
||||
|
||||
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
|
||||
|
@ -4120,8 +4125,6 @@ def urlopen(self, req):
|
|||
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
|
||||
'Try using --legacy-server-connect', cause=e) from e
|
||||
raise
|
||||
except HTTPError as e: # TODO: Remove in a future release
|
||||
raise _CompatHTTPError(e) from e
|
||||
|
||||
def build_request_director(self, handlers, preferences=None):
|
||||
logger = _YDLLogger(self)
|
||||
|
|
|
@ -31,4 +31,4 @@ def get_hidden_imports():
|
|||
hiddenimports = list(get_hidden_imports())
|
||||
print(f'Adding imports: {hiddenimports}')
|
||||
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts']
|
||||
excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle']
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
|
||||
|
@ -70,7 +71,6 @@ def compat_setenv(key, value, env=os.environ):
|
|||
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
|
||||
compat_http_client = http.client
|
||||
compat_http_server = http.server
|
||||
compat_HTTPError = urllib.error.HTTPError
|
||||
compat_input = input
|
||||
compat_integer_types = (int, )
|
||||
compat_itertools_count = itertools.count
|
||||
|
@ -88,7 +88,7 @@ def compat_setenv(key, value, env=os.environ):
|
|||
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
|
||||
compat_tokenize_tokenize = tokenize.tokenize
|
||||
compat_urllib_error = urllib.error
|
||||
compat_urllib_HTTPError = urllib.error.HTTPError
|
||||
compat_urllib_HTTPError = compat_HTTPError
|
||||
compat_urllib_parse = urllib.parse
|
||||
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_quote = urllib.parse.quote
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import glob
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
import io
|
||||
|
@ -23,7 +24,8 @@
|
|||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import functools
|
||||
from .compat import functools # isort: split
|
||||
from .compat import compat_os_name
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
secretstorage,
|
||||
|
@ -31,6 +33,7 @@
|
|||
)
|
||||
from .minicurses import MultilinePrinter, QuietMultilinePrinter
|
||||
from .utils import (
|
||||
DownloadError,
|
||||
Popen,
|
||||
error_to_str,
|
||||
expand_path,
|
||||
|
@ -122,13 +125,14 @@ def _extract_firefox_cookies(profile, container, logger):
|
|||
return YoutubeDLCookieJar()
|
||||
|
||||
if profile is None:
|
||||
search_root = _firefox_browser_dir()
|
||||
search_roots = list(_firefox_browser_dirs())
|
||||
elif _is_path(profile):
|
||||
search_root = profile
|
||||
search_roots = [profile]
|
||||
else:
|
||||
search_root = os.path.join(_firefox_browser_dir(), profile)
|
||||
search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
|
||||
search_root = ', '.join(map(repr, search_roots))
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger)
|
||||
cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
|
@ -182,12 +186,21 @@ def _extract_firefox_cookies(profile, container, logger):
|
|||
cursor.connection.close()
|
||||
|
||||
|
||||
def _firefox_browser_dir():
|
||||
def _firefox_browser_dirs():
|
||||
if sys.platform in ('cygwin', 'win32'):
|
||||
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
return os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
return os.path.expanduser('~/.mozilla/firefox')
|
||||
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
|
||||
else:
|
||||
yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
|
||||
|
||||
|
||||
def _firefox_cookie_dbs(roots):
|
||||
for root in map(os.path.abspath, roots):
|
||||
for pattern in ('', '*/', 'Profiles/*/'):
|
||||
yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
|
||||
|
||||
|
||||
def _get_chromium_based_browser_settings(browser_name):
|
||||
|
@ -268,7 +281,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
|||
logger.error(f'{browser_name} does not support profiles')
|
||||
search_root = config['browser_dir']
|
||||
|
||||
cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger)
|
||||
cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
|
||||
if cookie_database_path is None:
|
||||
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
|
||||
logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
|
||||
|
@ -307,6 +320,12 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
|||
counts['unencrypted'] = unencrypted_cookies
|
||||
logger.debug(f'cookie version breakdown: {counts}')
|
||||
return jar
|
||||
except PermissionError as error:
|
||||
if compat_os_name == 'nt' and error.errno == 13:
|
||||
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
raise
|
||||
finally:
|
||||
if cursor is not None:
|
||||
cursor.connection.close()
|
||||
|
@ -947,7 +966,7 @@ def _get_windows_v10_key(browser_root, logger):
|
|||
References:
|
||||
- [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
|
||||
"""
|
||||
path = _find_most_recently_used_file(browser_root, 'Local State', logger)
|
||||
path = _newest(_find_files(browser_root, 'Local State', logger))
|
||||
if path is None:
|
||||
logger.error('could not find local state file')
|
||||
return None
|
||||
|
@ -1049,17 +1068,20 @@ def _get_column_names(cursor, table_name):
|
|||
return [row[1].decode() for row in table_info]
|
||||
|
||||
|
||||
def _find_most_recently_used_file(root, filename, logger):
|
||||
def _newest(files):
|
||||
return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
|
||||
|
||||
|
||||
def _find_files(root, filename, logger):
|
||||
# if there are multiple browser profiles, take the most recently used one
|
||||
i, paths = 0, []
|
||||
i = 0
|
||||
with _create_progress_bar(logger) as progress_bar:
|
||||
for curr_root, dirs, files in os.walk(root):
|
||||
for curr_root, _, files in os.walk(root):
|
||||
for file in files:
|
||||
i += 1
|
||||
progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
|
||||
if file == filename:
|
||||
paths.append(os.path.join(curr_root, file))
|
||||
return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime)
|
||||
yield os.path.join(curr_root, file)
|
||||
|
||||
|
||||
def _merge_cookie_jars(jars):
|
||||
|
@ -1073,7 +1095,7 @@ def _merge_cookie_jars(jars):
|
|||
|
||||
|
||||
def _is_path(value):
|
||||
return os.path.sep in value
|
||||
return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
|
||||
|
||||
|
||||
def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
|
||||
|
|
|
@ -369,7 +369,10 @@ def fin_fragments():
|
|||
|
||||
return output.getvalue().encode()
|
||||
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
if len(fragments) == 1:
|
||||
self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
else:
|
||||
self.download_and_append_fragments(
|
||||
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
|
||||
else:
|
||||
return self.download_and_append_fragments(ctx, fragments, info_dict)
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
ACastChannelIE,
|
||||
)
|
||||
from .acfun import AcFunVideoIE, AcFunBangumiIE
|
||||
from .adn import ADNIE
|
||||
from .adn import ADNIE, ADNSeasonIE
|
||||
from .adobeconnect import AdobeConnectIE
|
||||
from .adobetv import (
|
||||
AdobeTVEmbedIE,
|
||||
|
@ -93,6 +93,7 @@
|
|||
AluraIE,
|
||||
AluraCourseIE
|
||||
)
|
||||
from .amadeustv import AmadeusTVIE
|
||||
from .amara import AmaraIE
|
||||
from .amcnetworks import AMCNetworksIE
|
||||
from .amazon import (
|
||||
|
@ -137,6 +138,10 @@
|
|||
ARDMediathekCollectionIE,
|
||||
ARDIE,
|
||||
)
|
||||
from .art19 import (
|
||||
Art19IE,
|
||||
Art19ShowIE,
|
||||
)
|
||||
from .arte import (
|
||||
ArteTVIE,
|
||||
ArteTVEmbedIE,
|
||||
|
@ -144,6 +149,7 @@
|
|||
ArteTVCategoryIE,
|
||||
)
|
||||
from .arnes import ArnesIE
|
||||
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
|
||||
from .atresplayer import AtresPlayerIE
|
||||
from .atscaleconf import AtScaleConfEventIE
|
||||
from .atvat import ATVAtIE
|
||||
|
@ -251,6 +257,7 @@
|
|||
from .bloomberg import BloombergIE
|
||||
from .bokecc import BokeCCIE
|
||||
from .bongacams import BongaCamsIE
|
||||
from .boosty import BoostyIE
|
||||
from .bostonglobe import BostonGlobeIE
|
||||
from .box import BoxIE
|
||||
from .boxcast import BoxCastVideoIE
|
||||
|
@ -345,6 +352,10 @@
|
|||
ChingariIE,
|
||||
ChingariUserIE,
|
||||
)
|
||||
from .chzzk import (
|
||||
CHZZKLiveIE,
|
||||
CHZZKVideoIE,
|
||||
)
|
||||
from .cinemax import CinemaxIE
|
||||
from .cinetecamilano import CinetecaMilanoIE
|
||||
from .cineverse import (
|
||||
|
@ -363,6 +374,7 @@
|
|||
from .cliprs import ClipRsIE
|
||||
from .closertotruth import CloserToTruthIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .cloudycdn import CloudyCDNIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
|
@ -540,6 +552,7 @@
|
|||
from .eighttracks import EightTracksIE
|
||||
from .einthusan import EinthusanIE
|
||||
from .eitb import EitbIE
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
from .elonet import ElonetIE
|
||||
from .elpais import ElPaisIE
|
||||
from .eltrecetv import ElTreceTVIE
|
||||
|
@ -557,6 +570,7 @@
|
|||
EroProfileIE,
|
||||
EroProfileAlbumIE,
|
||||
)
|
||||
from .err import ERRJupiterIE
|
||||
from .ertgr import (
|
||||
ERTFlixCodenameIE,
|
||||
ERTFlixIE,
|
||||
|
@ -581,6 +595,7 @@
|
|||
FacebookPluginsVideoIE,
|
||||
FacebookRedirectURLIE,
|
||||
FacebookReelIE,
|
||||
FacebookAdsIE,
|
||||
)
|
||||
from .fancode import (
|
||||
FancodeVodIE,
|
||||
|
@ -603,6 +618,7 @@
|
|||
from .filmweb import FilmwebIE
|
||||
from .firsttv import FirstTVIE
|
||||
from .fivetv import FiveTVIE
|
||||
from .flextv import FlexTVIE
|
||||
from .flickr import FlickrIE
|
||||
from .floatplane import (
|
||||
FloatplaneIE,
|
||||
|
@ -680,6 +696,10 @@
|
|||
GeniusIE,
|
||||
GeniusLyricsIE,
|
||||
)
|
||||
from .getcourseru import (
|
||||
GetCourseRuPlayerIE,
|
||||
GetCourseRuIE
|
||||
)
|
||||
from .gettr import (
|
||||
GettrIE,
|
||||
GettrStreamingIE,
|
||||
|
@ -787,6 +807,7 @@
|
|||
IHeartRadioIE,
|
||||
IHeartRadioPodcastIE,
|
||||
)
|
||||
from .ilpost import IlPostIE
|
||||
from .iltalehti import IltalehtiIE
|
||||
from .imdb import (
|
||||
ImdbIE,
|
||||
|
@ -899,6 +920,7 @@
|
|||
from .kth import KTHIE
|
||||
from .krasview import KrasViewIE
|
||||
from .ku6 import Ku6IE
|
||||
from .kukululive import KukuluLiveIE
|
||||
from .kusi import KUSIIE
|
||||
from .kuwo import (
|
||||
KuwoIE,
|
||||
|
@ -987,6 +1009,11 @@
|
|||
LRTVODIE,
|
||||
LRTStreamIE
|
||||
)
|
||||
from .lsm import (
|
||||
LSMLREmbedIE,
|
||||
LSMLTVEmbedIE,
|
||||
LSMReplayIE
|
||||
)
|
||||
from .lumni import (
|
||||
LumniIE
|
||||
)
|
||||
|
@ -996,7 +1023,7 @@
|
|||
)
|
||||
from .maariv import MaarivIE
|
||||
from .magellantv import MagellanTVIE
|
||||
from .magentamusik360 import MagentaMusik360IE
|
||||
from .magentamusik import MagentaMusikIE
|
||||
from .mailru import (
|
||||
MailRuIE,
|
||||
MailRuMusicIE,
|
||||
|
@ -1098,6 +1125,7 @@
|
|||
MotherlessIE,
|
||||
MotherlessGroupIE,
|
||||
MotherlessGalleryIE,
|
||||
MotherlessUploaderIE,
|
||||
)
|
||||
from .motorsport import MotorsportIE
|
||||
from .moviepilot import MoviepilotIE
|
||||
|
@ -1124,6 +1152,11 @@
|
|||
MusicdexArtistIE,
|
||||
MusicdexPlaylistIE,
|
||||
)
|
||||
from .mx3 import (
|
||||
Mx3IE,
|
||||
Mx3NeoIE,
|
||||
Mx3VolksmusikIE,
|
||||
)
|
||||
from .mxplayer import (
|
||||
MxplayerIE,
|
||||
MxplayerShowIE,
|
||||
|
@ -1216,7 +1249,10 @@
|
|||
NexxIE,
|
||||
NexxEmbedIE,
|
||||
)
|
||||
from .nfb import NFBIE
|
||||
from .nfb import (
|
||||
NFBIE,
|
||||
NFBSeriesIE,
|
||||
)
|
||||
from .nfhsnetwork import NFHSNetworkIE
|
||||
from .nfl import (
|
||||
NFLIE,
|
||||
|
@ -1253,6 +1289,7 @@
|
|||
NicovideoTagURLIE,
|
||||
NiconicoLiveIE,
|
||||
)
|
||||
from .ninaprotocol import NinaProtocolIE
|
||||
from .ninecninemedia import (
|
||||
NineCNineMediaIE,
|
||||
CPTwentyFourIE,
|
||||
|
@ -1263,6 +1300,7 @@
|
|||
NiconicoChannelPlusChannelLivesIE,
|
||||
)
|
||||
from .ninegag import NineGagIE
|
||||
from .ninenews import NineNewsIE
|
||||
from .ninenow import NineNowIE
|
||||
from .nintendo import NintendoIE
|
||||
from .nitter import NitterIE
|
||||
|
@ -1316,6 +1354,12 @@
|
|||
NYTimesIE,
|
||||
NYTimesArticleIE,
|
||||
NYTimesCookingIE,
|
||||
NYTimesCookingRecipeIE,
|
||||
)
|
||||
from .nuum import (
|
||||
NuumLiveIE,
|
||||
NuumTabIE,
|
||||
NuumMediaIE,
|
||||
)
|
||||
from .nuvid import NuvidIE
|
||||
from .nzherald import NZHeraldIE
|
||||
|
@ -1358,6 +1402,7 @@
|
|||
from .orf import (
|
||||
ORFTVthekIE,
|
||||
ORFFM4StoryIE,
|
||||
ORFONIE,
|
||||
ORFRadioIE,
|
||||
ORFPodcastIE,
|
||||
ORFIPTVIE,
|
||||
|
@ -1482,7 +1527,7 @@
|
|||
PuhuTVSerieIE,
|
||||
)
|
||||
from .pr0gramm import Pr0grammIE
|
||||
from .prankcast import PrankCastIE
|
||||
from .prankcast import PrankCastIE, PrankCastPostIE
|
||||
from .premiershiprugby import PremiershipRugbyIE
|
||||
from .presstv import PressTVIE
|
||||
from .projectveritas import ProjectVeritasIE
|
||||
|
@ -1579,6 +1624,7 @@
|
|||
RedBullIE,
|
||||
)
|
||||
from .reddit import RedditIE
|
||||
from .redge import RedCDNLivxIE
|
||||
from .redgifs import (
|
||||
RedGifsIE,
|
||||
RedGifsSearchIE,
|
||||
|
@ -1594,7 +1640,10 @@
|
|||
from .reuters import ReutersIE
|
||||
from .reverbnation import ReverbNationIE
|
||||
from .rheinmaintv import RheinMainTVIE
|
||||
from .rinsefm import RinseFMIE
|
||||
from .rinsefm import (
|
||||
RinseFMIE,
|
||||
RinseFMArtistPlaylistIE,
|
||||
)
|
||||
from .rmcdecouverte import RMCDecouverteIE
|
||||
from .rockstargames import RockstarGamesIE
|
||||
from .rokfin import (
|
||||
|
@ -1710,6 +1759,7 @@
|
|||
)
|
||||
from .scrolller import ScrolllerIE
|
||||
from .seeker import SeekerIE
|
||||
from .sejmpl import SejmIE
|
||||
from .senalcolombia import SenalColombiaLiveIE
|
||||
from .senategov import SenateISVPIE, SenateGovIE
|
||||
from .sendtonews import SendtoNewsIE
|
||||
|
@ -2002,6 +2052,7 @@
|
|||
TrovoChannelClipIE,
|
||||
)
|
||||
from .trtcocuk import TrtCocukVideoIE
|
||||
from .trtworld import TrtWorldIE
|
||||
from .trueid import TrueIDIE
|
||||
from .trunews import TruNewsIE
|
||||
from .truth import TruthIE
|
||||
|
@ -2019,7 +2070,6 @@
|
|||
TuneInPodcastEpisodeIE,
|
||||
TuneInShortenerIE,
|
||||
)
|
||||
from .turbo import TurboIE
|
||||
from .tv2 import (
|
||||
TV2IE,
|
||||
TV2ArticleIE,
|
||||
|
@ -2223,6 +2273,7 @@
|
|||
VikiIE,
|
||||
VikiChannelIE,
|
||||
)
|
||||
from .viously import ViouslyIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
ViuIE,
|
||||
|
@ -2271,11 +2322,6 @@
|
|||
WashingtonPostIE,
|
||||
WashingtonPostArticleIE,
|
||||
)
|
||||
from .wasdtv import (
|
||||
WASDTVStreamIE,
|
||||
WASDTVRecordIE,
|
||||
WASDTVClipIE,
|
||||
)
|
||||
from .wat import WatIE
|
||||
from .wdr import (
|
||||
WDRIE,
|
||||
|
@ -2454,6 +2500,7 @@
|
|||
Zee5SeriesIE,
|
||||
)
|
||||
from .zeenews import ZeeNewsIE
|
||||
from .zetland import ZetlandDKArticleIE
|
||||
from .zhihu import ZhihuIE
|
||||
from .zingmp3 import (
|
||||
ZingMp3IE,
|
||||
|
|
|
@ -92,6 +92,8 @@ def abematv_license_open(self, url):
|
|||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
|
||||
_USERTOKEN = None
|
||||
_DEVICE_ID = None
|
||||
_MEDIATOKEN = None
|
||||
|
@ -136,11 +138,15 @@ def _get_device_token(self):
|
|||
if self._USERTOKEN:
|
||||
return self._USERTOKEN
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
|
||||
username, _ = self._get_login_info()
|
||||
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
|
||||
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
|
||||
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
|
||||
if AbemaTVBaseIE._USERTOKEN:
|
||||
# try authentication with locally stored token
|
||||
try:
|
||||
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
|
||||
self._get_media_token(True)
|
||||
return
|
||||
except ExtractorError as e:
|
||||
|
@ -159,7 +165,6 @@ def _get_device_token(self):
|
|||
})
|
||||
AbemaTVBaseIE._USERTOKEN = user_data['token']
|
||||
|
||||
add_opener(self._downloader, AbemaLicenseHandler(self))
|
||||
return self._USERTOKEN
|
||||
|
||||
def _get_media_token(self, invalidate=False, to_show=True):
|
||||
|
@ -181,6 +186,37 @@ def _get_media_token(self, invalidate=False, to_show=True):
|
|||
|
||||
return self._MEDIATOKEN
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
auth_cache = {
|
||||
'device_id': AbemaTVBaseIE._DEVICE_ID,
|
||||
'usertoken': AbemaTVBaseIE._USERTOKEN,
|
||||
}
|
||||
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
|
||||
|
||||
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
|
||||
return self._download_json(
|
||||
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
|
||||
|
@ -204,7 +240,6 @@ def _extract_breadcrumb_list(self, webpage, video_id):
|
|||
|
||||
class AbemaTVIE(AbemaTVBaseIE):
|
||||
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
|
||||
_NETRC_MACHINE = 'abematv'
|
||||
_TESTS = [{
|
||||
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
|
||||
'info_dict': {
|
||||
|
@ -253,33 +288,6 @@ class AbemaTVIE(AbemaTVBaseIE):
|
|||
}]
|
||||
_TIMETABLE = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._get_device_token()
|
||||
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
|
||||
self.write_debug('Skipping logging in')
|
||||
return
|
||||
|
||||
if '@' in username: # don't strictly check if it's email address or not
|
||||
ep, method = 'user/email', 'email'
|
||||
else:
|
||||
ep, method = 'oneTimePassword', 'userId'
|
||||
|
||||
login_response = self._download_json(
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
AbemaTVBaseIE._USERTOKEN = login_response['token']
|
||||
self._get_media_token(True)
|
||||
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
|
||||
|
||||
def _real_extract(self, url):
|
||||
# starting download using infojson from this extractor is undefined behavior,
|
||||
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
import json
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
|
@ -17,17 +18,38 @@
|
|||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ADNIE(InfoExtractor):
|
||||
class ADNBaseIE(InfoExtractor):
|
||||
IE_DESC = 'Animation Digital Network'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = f'https://gw.api.{_BASE}/'
|
||||
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
|
||||
class ADNIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||
|
@ -44,29 +66,35 @@ class ADNIE(InfoExtractor):
|
|||
'season_number': 1,
|
||||
'episode': 'À ce soir !',
|
||||
'episode_number': 1,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 1',
|
||||
},
|
||||
'skip': 'Only available in region (FR, ...)',
|
||||
'skip': 'Only available in French and German speaking Europe',
|
||||
}, {
|
||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
|
||||
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
|
||||
'info_dict': {
|
||||
'id': '23550',
|
||||
'ext': 'mp4',
|
||||
'episode_number': 1,
|
||||
'duration': 1417,
|
||||
'release_date': '20231004',
|
||||
'series': 'The Eminence in Shadow',
|
||||
'season_number': 2,
|
||||
'episode': str,
|
||||
'title': str,
|
||||
'thumbnail': str,
|
||||
'season': 'Season 2',
|
||||
'comment_count': int,
|
||||
'average_rating': float,
|
||||
'description': str,
|
||||
},
|
||||
# 'skip': 'Only available in French and German speaking Europe',
|
||||
}]
|
||||
|
||||
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||
_BASE = 'animationdigitalnetwork.fr'
|
||||
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||
_HEADERS = {}
|
||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||
_POS_ALIGN_MAP = {
|
||||
'start': 1,
|
||||
'end': 3,
|
||||
}
|
||||
_LINE_ALIGN_MAP = {
|
||||
'middle': 8,
|
||||
'end': 4,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, sub_url, video_id):
|
||||
if not sub_url:
|
||||
return None
|
||||
|
@ -116,6 +144,8 @@ def _get_subtitles(self, sub_url, video_id):
|
|||
|
||||
if sub_lang == 'vostf':
|
||||
sub_lang = 'fr'
|
||||
elif sub_lang == 'vostde':
|
||||
sub_lang = 'de'
|
||||
subtitles.setdefault(sub_lang, []).extend([{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(sub),
|
||||
|
@ -147,7 +177,7 @@ def _perform_login(self, username, password):
|
|||
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
|
@ -157,12 +187,15 @@ def _real_extract(self, url):
|
|||
|
||||
user = options['user']
|
||||
if not user.get('hasAccess'):
|
||||
self.raise_login_required()
|
||||
start_date = traverse_obj(options, ('video', 'startDate', {str}))
|
||||
if (parse_iso8601(start_date) or 0) > time.time():
|
||||
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
token = self._download_json(
|
||||
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||
video_id, 'Downloading access token', headers={
|
||||
'x-player-refresh-token': user['refreshToken']
|
||||
'X-Player-Refresh-Token': user['refreshToken'],
|
||||
}, data=b'')['token']
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
|
@ -184,7 +217,9 @@ def _real_extract(self, url):
|
|||
try:
|
||||
links_data = self._download_json(
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
|
@ -232,8 +267,14 @@ def _real_extract(self, url):
|
|||
if format_id == 'vf':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'fr'
|
||||
elif format_id == 'vde':
|
||||
for f in m3u8_formats:
|
||||
f['language'] = 'de'
|
||||
formats.extend(m3u8_formats)
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
|
@ -255,3 +296,40 @@ def _real_extract(self, url):
|
|||
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||
'comment_count': int_or_none(video.get('commentsCount')),
|
||||
}
|
||||
|
||||
|
||||
class ADNSeasonIE(ADNBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
|
||||
'playlist_count': 12,
|
||||
'info_dict': {
|
||||
'id': '911',
|
||||
'title': 'Tokyo Mew Mew New',
|
||||
},
|
||||
# 'skip': 'Only available in French end German speaking Europe',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
|
||||
show = self._download_json(
|
||||
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
|
||||
'Downloading show JSON metadata', headers=self._HEADERS)['show']
|
||||
show_id = str(show['id'])
|
||||
episodes = self._download_json(
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
}, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
})
|
||||
|
||||
def entries():
|
||||
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
|
||||
yield self.url_result(
|
||||
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
|
||||
ADNIE, episode_id)
|
||||
|
||||
return self.playlist_result(entries(), show_id, show.get('title'))
|
||||
|
|
|
@ -93,7 +93,7 @@ def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
|||
resource = self._get_mvpd_resource(
|
||||
requestor_id, theplatform_metadata['title'],
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
theplatform_metadata['ratings'][0]['rating'])
|
||||
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
|
|
77
yt_dlp/extractor/amadeustv.py
Normal file
77
yt_dlp/extractor/amadeustv.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AmadeusTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
|
||||
'info_dict': {
|
||||
'id': '5576678021301411311',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
|
||||
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
|
||||
'duration': 1264.8,
|
||||
'upload_date': '20230918',
|
||||
'timestamp': 1695034800,
|
||||
'display_id': '65091a87ff85af59d9fc54c3',
|
||||
'view_count': int,
|
||||
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
|
||||
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
|
||||
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract actual video ID')
|
||||
|
||||
video_data = self._download_json(
|
||||
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
|
||||
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
|
||||
if not url_or_none(video.get('url')):
|
||||
continue
|
||||
formats.append({
|
||||
**traverse_obj(video, {
|
||||
'url': 'url',
|
||||
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'filesize': (('totalSize', 'size'), {int_or_none}),
|
||||
'vcodec': ('videoStreamList', 0, 'codec'),
|
||||
'acodec': ('audioStreamList', 0, 'codec'),
|
||||
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
|
||||
}, get_all=False),
|
||||
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('videoInfo', 'basicInfo', 'name', {str}),
|
||||
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
|
||||
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
|
||||
}, get_all=False),
|
||||
**traverse_obj(nuxt_data, ('item', {
|
||||
'title': (('title', 'title_en', 'title_cn'), {str}),
|
||||
'description': (('description', 'description_en', 'description_cn'), {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
'view_count': ('view', {int_or_none}),
|
||||
}), get_all=False),
|
||||
}
|
|
@ -78,14 +78,14 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
|
|||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ant1news.gr/afieromata/article/549468/o-tzeims-mpont-sta-meteora-oi-apeiles-kai-o-xesikomos-ton-kalogeron',
|
||||
'md5': '294f18331bb516539d72d85a82887dcc',
|
||||
'md5': '57eb8d12181f0fa2b14b0b138e1de9b6',
|
||||
'info_dict': {
|
||||
'id': '_xvg/m_cmbatw=',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:a93e8ecf2e4073bfdffcb38f59945411',
|
||||
'timestamp': 1603092840,
|
||||
'upload_date': '20201019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/640/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
'timestamp': 1666166520,
|
||||
'upload_date': '20221019',
|
||||
'thumbnail': 'https://ant1media.azureedge.net/imgHandler/1920/756206d2-d640-40e2-b201-3555abdfc0db.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn',
|
||||
|
@ -117,7 +117,7 @@ class Ant1NewsGrEmbedIE(AntennaBaseIE):
|
|||
_BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player'
|
||||
_VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)']
|
||||
_API_PATH = '/news/templates/data/jsonPlayer'
|
||||
_API_PATH = '/templates/data/jsonPlayer'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.antenna.gr/templates/pages/player?cid=3f_li_c_az_jw_y_u=&w=670&h=377',
|
||||
|
|
|
@ -4,9 +4,11 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
bug_reports_message,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
make_archive_id,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
|
@ -233,17 +235,18 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||
(?:(?:beta|www)\.)?ardmediathek\.de/
|
||||
(?:[^/]+/)?
|
||||
(?:player|live|video)/
|
||||
(?:(?P<display_id>[^?#]+)/)?
|
||||
(?:[^?#]+/)?
|
||||
(?P<id>[a-zA-Z0-9]+)
|
||||
/?(?:[?#]|$)'''
|
||||
_GEO_COUNTRIES = ['DE']
|
||||
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
|
||||
'info_dict': {
|
||||
'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen',
|
||||
'id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
|
||||
'id': '12939099',
|
||||
'title': 'Liebe auf vier Pfoten',
|
||||
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
|
||||
'duration': 5222,
|
||||
|
@ -255,7 +258,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||
'series': 'Filme im MDR',
|
||||
'age_limit': 0,
|
||||
'channel': 'MDR',
|
||||
'_old_archive_ids': ['ardbetamediathek 12939099'],
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||
|
@ -276,37 +279,37 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||
'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'md5': '1e73ded21cb79bac065117e80c81dc88',
|
||||
'info_dict': {
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'id': '10049223',
|
||||
'ext': 'mp4',
|
||||
'title': 'tagesschau, 20:00 Uhr',
|
||||
'timestamp': 1636398000,
|
||||
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
|
||||
'upload_date': '20211108',
|
||||
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
|
||||
'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
|
||||
'duration': 915,
|
||||
'episode': 'tagesschau, 20:00 Uhr',
|
||||
'series': 'tagesschau',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
|
||||
'channel': 'ARD-Aktuell',
|
||||
'_old_archive_ids': ['ardbetamediathek 10049223'],
|
||||
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'md5': 'c428b9effff18ff624d4f903bda26315',
|
||||
'info_dict': {
|
||||
'id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'id': '94834686',
|
||||
'ext': 'mp4',
|
||||
'duration': 2700,
|
||||
'episode': '7 Tage ... unter harten Jungs',
|
||||
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
|
||||
'upload_date': '20231005',
|
||||
'timestamp': 1696491171,
|
||||
'display_id': '7-tage/7-tage-unter-harten-jungs/hr-fernsehen',
|
||||
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
|
||||
'series': '7 Tage ...',
|
||||
'channel': 'HR',
|
||||
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
|
||||
'title': '7 Tage ... unter harten Jungs',
|
||||
'_old_archive_ids': ['ardbetamediathek 94834686'],
|
||||
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
|
@ -357,13 +360,39 @@ def _extract_episode_info(self, title):
|
|||
}), get_all=False)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
display_id = self._match_id(url)
|
||||
query = {'embedded': 'false', 'mcV6': 'true'}
|
||||
headers = {}
|
||||
|
||||
if self._get_cookies(self._TOKEN_URL).get('ams'):
|
||||
token = self._download_json(
|
||||
self._TOKEN_URL, display_id, 'Fetching token for age verification',
|
||||
'Unable to fetch age verification token', fatal=False)
|
||||
id_token = traverse_obj(token, ('idToken', {str}))
|
||||
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
|
||||
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
|
||||
if not user_id:
|
||||
self.report_warning('Unable to extract token, continuing without authentication')
|
||||
else:
|
||||
headers['x-authorization'] = f'Bearer {id_token}'
|
||||
query['userId'] = user_id
|
||||
if decoded_token.get('age_rating') != 18:
|
||||
self.report_warning('Account is not verified as 18+; video may be unavailable')
|
||||
|
||||
page_data = self._download_json(
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}', video_id, query={
|
||||
'embedded': 'false',
|
||||
'mcV6': 'true',
|
||||
})
|
||||
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
|
||||
display_id, query=query, headers=headers)
|
||||
|
||||
# For user convenience we use the old contentId instead of the longer crid
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
|
||||
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
|
||||
if old_id is not None:
|
||||
video_id = str(old_id)
|
||||
archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
|
||||
else:
|
||||
self.report_warning(f'Could not extract contentId{bug_reports_message()}')
|
||||
video_id = display_id
|
||||
archive_ids = None
|
||||
|
||||
player_data = traverse_obj(
|
||||
page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
|
||||
|
@ -371,7 +400,7 @@ def _real_extract(self, url):
|
|||
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
|
||||
|
||||
if player_data.get('blockedByFsk'):
|
||||
self.raise_no_formats('This video is only available after 22:00', expected=True)
|
||||
self.raise_login_required('This video is only available for age verified users or after 22:00')
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
@ -419,8 +448,6 @@ def _real_extract(self, url):
|
|||
})
|
||||
|
||||
age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
|
||||
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
@ -438,7 +465,7 @@ def _real_extract(self, url):
|
|||
'channel': 'clipSourceName',
|
||||
})),
|
||||
**self._extract_episode_info(page_data.get('title')),
|
||||
'_old_archive_ids': [make_archive_id(ARDBetaMediathekIE, old_id)],
|
||||
'_old_archive_ids': archive_ids,
|
||||
}
|
||||
|
||||
|
||||
|
|
303
yt_dlp/extractor/art19.py
Normal file
303
yt_dlp/extractor/art19.py
Normal file
|
@ -0,0 +1,303 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Art19IE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
|
||||
_VALID_URL = [
|
||||
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
|
||||
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
|
||||
'info_dict': {
|
||||
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'ext': 'mp3',
|
||||
'title': 'Why Did DeSantis Drop Out?',
|
||||
'series': 'The Daily Briefing',
|
||||
'release_timestamp': 1705941275,
|
||||
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
|
||||
'episode': 'Episode 582',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
|
||||
'upload_date': '20240122',
|
||||
'timestamp': 1705940815,
|
||||
'episode_number': 582,
|
||||
'modified_date': '20240122',
|
||||
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
|
||||
'modified_timestamp': 1705941275,
|
||||
'release_date': '20240122',
|
||||
'duration': 527.4,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'info_dict': {
|
||||
'id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'ext': 'mp3',
|
||||
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
|
||||
'modified_date': '20240116',
|
||||
'upload_date': '20240105',
|
||||
'modified_timestamp': 1705435802,
|
||||
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
|
||||
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
|
||||
'release_timestamp': 1705305660,
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1704481536,
|
||||
'episode_number': 88,
|
||||
'series': 'Scamfluencers',
|
||||
'duration': 2588.37501,
|
||||
'episode': 'Episode 88',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
|
||||
'info_dict': {
|
||||
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'ext': 'mp3',
|
||||
'title': "'Verstappen wordt een synoniem voor Formule 1'",
|
||||
'season': 'Seizoen 6',
|
||||
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
|
||||
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
|
||||
'duration': 3061.82111,
|
||||
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
|
||||
'release_date': '20231126',
|
||||
'modified_timestamp': 1701156004,
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'season_number': 6,
|
||||
'episode_number': 52,
|
||||
'modified_date': '20231128',
|
||||
'upload_date': '20231126',
|
||||
'timestamp': 1701025981,
|
||||
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
|
||||
'series': 'De Boordradio',
|
||||
'release_timestamp': 1701026308,
|
||||
'episode': 'Episode 52',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
|
||||
'info_dict': {
|
||||
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'ext': 'mp3',
|
||||
'title': 'Larry Bucshon announces retirement from congress',
|
||||
'upload_date': '20240115',
|
||||
'episode_number': 148,
|
||||
'episode': 'Episode 148',
|
||||
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
|
||||
'release_date': '20240115',
|
||||
'timestamp': 1705328205,
|
||||
'release_timestamp': 1705329275,
|
||||
'series': 'All INdiana Politics',
|
||||
'modified_date': '20240117',
|
||||
'modified_timestamp': 1705458901,
|
||||
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
|
||||
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
|
||||
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
|
||||
'duration': 1256.18848,
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for episode_id in re.findall(
|
||||
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
|
||||
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
|
||||
player_metadata = self._download_json(
|
||||
f'https://art19.com/episodes/{episode_id}', episode_id,
|
||||
note='Downloading player metadata', fatal=False,
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
rss_metadata = self._download_json(
|
||||
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
|
||||
note='Downloading RSS metadata')
|
||||
|
||||
formats = [{
|
||||
'format_id': 'direct',
|
||||
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
}]
|
||||
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
|
||||
if fmt_id == 'waveform_bin':
|
||||
continue
|
||||
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
|
||||
if not fmt_url:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': fmt_id,
|
||||
'url': fmt_url,
|
||||
'vcodec': 'none',
|
||||
'acodec': fmt_id,
|
||||
'quality': -2 if fmt_id == 'ogg' else -1,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(player_metadata, ('episode', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'episode_id': ('id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'release_timestamp': ('released_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601})
|
||||
})),
|
||||
**traverse_obj(rss_metadata, ('content', {
|
||||
'title': ('episode_title', {str}),
|
||||
'description': ('episode_description_plain', {str}),
|
||||
'episode_id': ('episode_id', {str}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'season': ('season_title', {str}),
|
||||
'season_id': ('season_id', {str}),
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'series': ('series_title', {str}),
|
||||
'series_id': ('series_id', {str}),
|
||||
'thumbnail': ('cover_image', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class Art19ShowIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
|
||||
_VALID_URL = [
|
||||
rf'{_VALID_URL_BASE}(?:$|[#?])',
|
||||
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://www.art19.com/shows/echt-gebeurd',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
|
||||
'display_id': 'echt-gebeurd',
|
||||
'title': 'Echt Gebeurd',
|
||||
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
|
||||
'timestamp': 1492642167,
|
||||
'upload_date': '20170419',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:7',
|
||||
},
|
||||
'playlist_mincount': 425,
|
||||
}, {
|
||||
'url': 'https://rss.art19.com/scamfluencers',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
|
||||
'display_id': 'scamfluencers',
|
||||
'title': 'Scamfluencers',
|
||||
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
|
||||
'timestamp': 1647368573,
|
||||
'upload_date': '20220315',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}, {
|
||||
'url': 'https://art19.com/shows/enthuellt/embed',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
|
||||
'display_id': 'enthuellt',
|
||||
'title': 'Enthüllt',
|
||||
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
|
||||
'timestamp': 1601645860,
|
||||
'upload_date': '20201002',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
|
||||
'display_id': 'deconstructing-yourself',
|
||||
'title': 'Deconstructing Yourself',
|
||||
'description': 'md5:dab5082b28b248a35476abf64768854d',
|
||||
'timestamp': 1570581181,
|
||||
'upload_date': '20191009',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': 'count:5',
|
||||
},
|
||||
'playlist_mincount': 80,
|
||||
}, {
|
||||
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
|
||||
'display_id': 'the-ben-joravsky-show',
|
||||
'title': 'The Ben Joravsky Show',
|
||||
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
|
||||
'timestamp': 1550875095,
|
||||
'upload_date': '20190222',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
|
||||
},
|
||||
'playlist_mincount': 1900,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for series_id in re.findall(
|
||||
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
|
||||
yield f'https://art19.com/shows/{series_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
series_metadata = self._download_json(
|
||||
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
|
||||
headers={'Accept': 'application/vnd.art19.v0+json'})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': [
|
||||
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
|
||||
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
|
||||
],
|
||||
**traverse_obj(series_metadata, ('series', {
|
||||
'id': ('id', {str}),
|
||||
'display_id': ('slug', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description_plain', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
})),
|
||||
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
|
||||
}
|
|
@ -70,7 +70,24 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
|
||||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
||||
'info_dict': {
|
||||
'id': '085374-003-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
||||
'timestamp': 1702872000,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
||||
'duration': 2594,
|
||||
'title': 'Die kurze Zeit der Jugend',
|
||||
'alt_title': 'Im hohen Norden geboren',
|
||||
'upload_date': '20231218',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
|
@ -121,6 +138,16 @@ class ArteTVIE(ArteTVBaseIE):
|
|||
),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _fix_accessible_subs_locale(subs):
|
||||
updated_subs = {}
|
||||
for lang, sub_formats in subs.items():
|
||||
for format in sub_formats:
|
||||
if format.get('url', '').endswith('-MAL.m3u8'):
|
||||
lang += '-acc'
|
||||
updated_subs.setdefault(lang, []).append(format)
|
||||
return updated_subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
@ -174,6 +201,7 @@ def _real_extract(self, url):
|
|||
secondary_formats.extend(fmts)
|
||||
else:
|
||||
formats.extend(fmts)
|
||||
subs = self._fix_accessible_subs_locale(subs)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
elif stream['protocol'] in ('HTTPS', 'RTMP'):
|
||||
|
|
168
yt_dlp/extractor/asobichannel.py
Normal file
168
yt_dlp/extractor/asobichannel.py
Normal file
|
@ -0,0 +1,168 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AsobiChannelBaseIE(InfoExtractor):
|
||||
_MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
|
||||
|
||||
def _extract_info(self, metadata):
|
||||
return traverse_obj(metadata, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
|
||||
'timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
})
|
||||
|
||||
|
||||
class AsobiChannelIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
|
||||
'md5': '39df74e872afe032c4eb27b89144fc92',
|
||||
'info_dict': {
|
||||
'id': '1ypp48qd32p',
|
||||
'ext': 'mp4',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
|
||||
'timestamp': 1697098247,
|
||||
'upload_date': '20231012',
|
||||
'modified_timestamp': 1698381162,
|
||||
'modified_date': '20231027',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
|
||||
'md5': '229fa8fb5c591c75ce8c37a497f113f6',
|
||||
'info_dict': {
|
||||
'id': 'redigiwnjzqj',
|
||||
'ext': 'mp4',
|
||||
'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
|
||||
'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
|
||||
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
|
||||
'modified_timestamp': 1697797125,
|
||||
'modified_date': '20231020',
|
||||
'timestamp': 1697261769,
|
||||
'upload_date': '20231014',
|
||||
'channel': 'アイドルマスター',
|
||||
'channel_id': 'idolmaster',
|
||||
},
|
||||
}]
|
||||
|
||||
_survapi_header = None
|
||||
|
||||
def _real_initialize(self):
|
||||
token = self._download_json(
|
||||
'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
|
||||
note='Retrieving API token')
|
||||
self._survapi_header = {'Authorization': f'Bearer {token}'}
|
||||
|
||||
def _process_vod(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
|
||||
vod_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
|
||||
headers=self._survapi_header, note='Downloading vod data')
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
|
||||
}
|
||||
|
||||
def _process_live(self, video_id, metadata):
|
||||
content_id = metadata['contents']['video_id']
|
||||
event_data = self._download_json(
|
||||
f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
|
||||
headers=self._survapi_header, note='Downloading event data')
|
||||
|
||||
player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
|
||||
if player_type == 'poster':
|
||||
self.raise_no_formats('Live event has not yet started', expected=True)
|
||||
live_status = 'is_upcoming'
|
||||
formats = []
|
||||
elif player_type == 'player':
|
||||
live_status = 'is_live'
|
||||
formats = self._extract_m3u8_formats(
|
||||
event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
|
||||
else:
|
||||
raise ExtractorError('Unsupported player type {player_type!r}')
|
||||
|
||||
return {
|
||||
'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
|
||||
'live_status': live_status,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
metadata = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
|
||||
headers=self._MICROCMS_HEADER)
|
||||
|
||||
info = self._extract_info(metadata)
|
||||
|
||||
video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
|
||||
if video_type == 'VOD':
|
||||
return merge_dicts(info, self._process_vod(video_id, metadata))
|
||||
if video_type == 'LIVE':
|
||||
return merge_dicts(info, self._process_live(video_id, metadata))
|
||||
|
||||
raise ExtractorError(f'Unexpected video type {video_type!r}')
|
||||
|
||||
|
||||
class AsobiChannelTagURLIE(AsobiChannelBaseIE):
|
||||
IE_NAME = 'asobichannel:tag'
|
||||
IE_DESC = 'ASOBI CHANNEL'
|
||||
|
||||
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
|
||||
'info_dict': {
|
||||
'id': 'bjhh-nbcja',
|
||||
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
|
||||
},
|
||||
'playlist_mincount': 16,
|
||||
}, {
|
||||
'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
|
||||
'info_dict': {
|
||||
'id': 'hvm5qw3c6od',
|
||||
'title': 'アイマスMOIW2023ラジオ',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
tag_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, tag_id)
|
||||
title = traverse_obj(self._search_nextjs_data(
|
||||
webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
|
||||
|
||||
media = self._download_json(
|
||||
f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
|
||||
tag_id, headers=self._MICROCMS_HEADER)
|
||||
|
||||
def entries():
|
||||
for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
|
||||
yield {
|
||||
'_type': 'url',
|
||||
'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
|
||||
'ie_key': AsobiChannelIE.ie_key(),
|
||||
**self._extract_info(metadata),
|
||||
}
|
||||
|
||||
return self.playlist_result(entries(), tag_id, title)
|
|
@ -7,6 +7,7 @@
|
|||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..dependencies import Cryptodome
|
||||
|
@ -18,6 +19,7 @@
|
|||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
|
@ -1303,6 +1305,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
|||
'upload_date': '20211127',
|
||||
},
|
||||
'playlist_mincount': 513,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
|
||||
'info_dict': {
|
||||
'id': 'BV1DU4y1r7tz',
|
||||
'ext': 'mp4',
|
||||
'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
|
||||
'upload_date': '20220820',
|
||||
'description': '',
|
||||
'timestamp': 1661016330,
|
||||
'uploader_id': '1958703906',
|
||||
'uploader': '靡烟miya',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'duration': 9552.903,
|
||||
'tags': list,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'_old_archive_ids': ['bilibili 687146339_part1'],
|
||||
},
|
||||
'params': {'noplaylist': True},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
|
||||
'info_dict': {
|
||||
|
@ -1354,6 +1376,11 @@ def _extract_medialist(self, query, list_id):
|
|||
|
||||
def _real_extract(self, url):
|
||||
list_id = self._match_id(url)
|
||||
|
||||
bvid = traverse_obj(parse_qs(url), ('bvid', 0))
|
||||
if not self._yes_playlist(list_id, bvid):
|
||||
return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
|
||||
|
||||
webpage = self._download_webpage(url, list_id)
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
|
||||
if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
|
||||
|
@ -1463,8 +1490,37 @@ class BiliBiliSearchIE(SearchInfoExtractor):
|
|||
IE_DESC = 'Bilibili video search'
|
||||
_MAX_RESULTS = 100000
|
||||
_SEARCH_KEY = 'bilisearch'
|
||||
_TESTS = [{
|
||||
'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
'playlist_count': 3,
|
||||
'info_dict': {
|
||||
'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1n44y1Q7sc',
|
||||
'ext': 'mp4',
|
||||
'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
|
||||
'timestamp': 1669889987,
|
||||
'upload_date': '20221201',
|
||||
'description': 'md5:43343c0973defff527b5a4b403b4abf9',
|
||||
'tags': list,
|
||||
'uploader': '靡烟miya',
|
||||
'duration': 123.156,
|
||||
'uploader_id': '1958703906',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'_old_archive_ids': ['bilibili 988222410_part1'],
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _search_results(self, query):
|
||||
if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
|
||||
self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
|
||||
for page_num in itertools.count(1):
|
||||
videos = self._download_json(
|
||||
'https://api.bilibili.com/x/web-interface/search/type', query,
|
||||
|
@ -1621,6 +1677,7 @@ def _real_extract(self, url):
|
|||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
|
||||
|
||||
def _call_api(self, endpoint, *args, **kwargs):
|
||||
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
|
||||
|
@ -1658,19 +1715,34 @@ def _get_subtitles(self, *, ep_id=None, aid=None):
|
|||
'aid': aid,
|
||||
})) or {}
|
||||
subtitles = {}
|
||||
for sub in sub_json.get('subtitles') or []:
|
||||
sub_url = sub.get('url')
|
||||
if not sub_url:
|
||||
continue
|
||||
sub_data = self._download_json(
|
||||
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
|
||||
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
|
||||
if not sub_data:
|
||||
continue
|
||||
subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(sub_data)
|
||||
})
|
||||
fetched_urls = set()
|
||||
for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
|
||||
for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
|
||||
if url in fetched_urls:
|
||||
continue
|
||||
fetched_urls.add(url)
|
||||
sub_ext = determine_ext(url)
|
||||
sub_lang = sub.get('lang_key') or 'en'
|
||||
|
||||
if sub_ext == 'ass':
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': 'ass',
|
||||
'url': url,
|
||||
})
|
||||
elif sub_ext == 'json':
|
||||
sub_data = self._download_json(
|
||||
url, ep_id or aid, fatal=False,
|
||||
note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
|
||||
errnote='Unable to download subtitles')
|
||||
|
||||
if sub_data:
|
||||
subtitles.setdefault(sub_lang, []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(sub_data),
|
||||
})
|
||||
else:
|
||||
self.report_warning('Unexpected subtitle extension', ep_id or aid)
|
||||
|
||||
return subtitles
|
||||
|
||||
def _get_formats(self, *, ep_id=None, aid=None):
|
||||
|
@ -1716,7 +1788,9 @@ def _get_formats(self, *, ep_id=None, aid=None):
|
|||
def _parse_video_metadata(self, video_data):
|
||||
return {
|
||||
'title': video_data.get('title_display') or video_data.get('title'),
|
||||
'description': video_data.get('desc'),
|
||||
'thumbnail': video_data.get('cover'),
|
||||
'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
|
||||
}
|
||||
|
@ -1813,17 +1887,6 @@ class BiliIntlIE(BiliIntlBaseIE):
|
|||
'episode_number': 140,
|
||||
},
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
||||
}, {
|
||||
'url': 'https://www.bilibili.tv/en/video/2041863208',
|
||||
'info_dict': {
|
||||
'id': '2041863208',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1670874843,
|
||||
'description': 'Scheduled for April 2023.\nStudio: ufotable',
|
||||
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||
'upload_date': '20221212',
|
||||
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||
},
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
|
@ -1864,9 +1927,9 @@ class BiliIntlIE(BiliIntlBaseIE):
|
|||
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
|
||||
'timestamp': 1667891924,
|
||||
'upload_date': '20221108',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
|
||||
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
|
||||
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
|
@ -1929,10 +1992,12 @@ def _extract_video_metadata(self, url, video_id, season_id):
|
|||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
return merge_dicts(
|
||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
|
||||
'title': self._html_search_meta('og:title', webpage),
|
||||
'description': self._html_search_meta('og:description', webpage)
|
||||
})
|
||||
self._parse_video_metadata(video_data), {
|
||||
'title': get_element_by_class(
|
||||
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
|
||||
'description': get_element_by_class(
|
||||
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
|
||||
}, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||
comment_api_raw_data = self._download_json(
|
||||
|
@ -2020,7 +2085,8 @@ def _real_extract(self, url):
|
|||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
||||
'chapters': chapters,
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id)
|
||||
'__post_extractor': self.extract_comments(video_id, ep_id),
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
|
||||
|
|
209
yt_dlp/extractor/boosty.py
Normal file
209
yt_dlp/extractor/boosty.py
Normal file
|
@ -0,0 +1,209 @@
|
|||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BoostyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?boosty\.to/(?P<user>[^/#?]+)/posts/(?P<post_id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
# single ok_video
|
||||
'url': 'https://boosty.to/kuplinov/posts/e55d050c-e3bb-4873-a7db-ac7a49b40c38',
|
||||
'info_dict': {
|
||||
'id': 'd7473824-352e-48e2-ae53-d4aa39459968',
|
||||
'title': 'phasma_3',
|
||||
'channel': 'Kuplinov',
|
||||
'channel_id': '7958701',
|
||||
'timestamp': 1655031975,
|
||||
'upload_date': '20220612',
|
||||
'release_timestamp': 1655049000,
|
||||
'release_date': '20220612',
|
||||
'modified_timestamp': 1668680993,
|
||||
'modified_date': '20221117',
|
||||
'tags': ['куплинов', 'phasmophobia'],
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 105,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
# multiple ok_video
|
||||
'url': 'https://boosty.to/maddyson/posts/0c652798-3b35-471f-8b48-a76a0b28736f',
|
||||
'info_dict': {
|
||||
'id': '0c652798-3b35-471f-8b48-a76a0b28736f',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'cc325a9f-a563-41c6-bf47-516c1b506c9a',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 31204,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'd07b0a72-9493-4512-b54e-55ce468fd4b7',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 25704,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '4a3bba32-78c8-422a-9432-2791aff60b42',
|
||||
'title': 'то что не пропустил юта6',
|
||||
'channel': 'Илья Давыдов',
|
||||
'channel_id': '6808257',
|
||||
'timestamp': 1694017040,
|
||||
'upload_date': '20230906',
|
||||
'release_timestamp': 1694017040,
|
||||
'release_date': '20230906',
|
||||
'modified_timestamp': 1694071178,
|
||||
'modified_date': '20230907',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 31867,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.mycdn\.me/videoPreview\?',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
# single external video (youtube)
|
||||
'url': 'https://boosty.to/denischuzhoy/posts/6094a487-bcec-4cf8-a453-43313b463c38',
|
||||
'info_dict': {
|
||||
'id': 'EXelTnve5lY',
|
||||
'title': 'Послание Президента Федеральному Собранию | Класс народа',
|
||||
'upload_date': '20210425',
|
||||
'channel': 'Денис Чужой',
|
||||
'tags': 'count:10',
|
||||
'like_count': int,
|
||||
'ext': 'mp4',
|
||||
'duration': 816,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https://i\.ytimg\.com/',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'categories': list,
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCCzVNbWZfYpBfyofCCUD_0w',
|
||||
'channel_is_verified': bool,
|
||||
'channel_url': r're:^https://www\.youtube\.com/',
|
||||
'comment_count': int,
|
||||
'description': str,
|
||||
'heatmap': 'count:100',
|
||||
'live_status': str,
|
||||
'playable_in_embed': bool,
|
||||
'uploader': str,
|
||||
'uploader_id': str,
|
||||
'uploader_url': r're:^https://www\.youtube\.com/',
|
||||
},
|
||||
}]
|
||||
|
||||
_MP4_TYPES = ('tiny', 'lowest', 'low', 'medium', 'high', 'full_hd', 'quad_hd', 'ultra_hd')
|
||||
|
||||
def _extract_formats(self, player_urls, video_id):
|
||||
formats = []
|
||||
quality = qualities(self._MP4_TYPES)
|
||||
for player_url in traverse_obj(player_urls, lambda _, v: url_or_none(v['url'])):
|
||||
url = player_url['url']
|
||||
format_type = player_url.get('type')
|
||||
if format_type in ('hls', 'hls_live', 'live_ondemand_hls', 'live_playback_hls'):
|
||||
formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id='hls', fatal=False))
|
||||
elif format_type in ('dash', 'dash_live', 'live_playback_dash'):
|
||||
formats.extend(self._extract_mpd_formats(url, video_id, mpd_id='dash', fatal=False))
|
||||
elif format_type in self._MP4_TYPES:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': 'mp4',
|
||||
'format_id': format_type,
|
||||
'quality': quality(format_type),
|
||||
})
|
||||
else:
|
||||
self.report_warning(f'Unknown format type: {format_type!r}')
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
user, post_id = self._match_valid_url(url).group('user', 'post_id')
|
||||
post = self._download_json(
|
||||
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
|
||||
note='Downloading post data', errnote='Unable to download post data')
|
||||
|
||||
post_title = post.get('title')
|
||||
if not post_title:
|
||||
self.report_warning('Unable to extract post title. Falling back to parsing html page')
|
||||
webpage = self._download_webpage(url, video_id=post_id)
|
||||
post_title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
|
||||
common_metadata = {
|
||||
'title': post_title,
|
||||
**traverse_obj(post, {
|
||||
'channel': ('user', 'name', {str}),
|
||||
'channel_id': ('user', 'id', {str_or_none}),
|
||||
'timestamp': ('createdAt', {int_or_none}),
|
||||
'release_timestamp': ('publishTime', {int_or_none}),
|
||||
'modified_timestamp': ('updatedAt', {int_or_none}),
|
||||
'tags': ('tags', ..., 'title', {str}),
|
||||
'like_count': ('count', 'likes', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
entries = []
|
||||
for item in traverse_obj(post, ('data', ..., {dict})):
|
||||
item_type = item.get('type')
|
||||
if item_type == 'video' and url_or_none(item.get('url')):
|
||||
entries.append(self.url_result(item['url'], YoutubeIE))
|
||||
elif item_type == 'ok_video':
|
||||
video_id = item.get('id') or post_id
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': self._extract_formats(item.get('playerUrls'), video_id),
|
||||
**common_metadata,
|
||||
**traverse_obj(item, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('viewsCounter', {int_or_none}),
|
||||
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
|
||||
}, get_all=False)})
|
||||
|
||||
if not entries:
|
||||
raise ExtractorError('No videos found', expected=True)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries, post_id, post_title, **common_metadata)
|
|
@ -1,6 +1,7 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_resolution,
|
||||
|
@ -60,6 +61,7 @@ def _real_extract(self, url):
|
|||
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
|
||||
'media': media_type,
|
||||
'idint': media_id,
|
||||
'format': 'dm',
|
||||
})
|
||||
|
||||
formats = []
|
||||
|
@ -69,6 +71,10 @@ def _real_extract(self, url):
|
|||
format_url = url_or_none(format_.get('file'))
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(format_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
format_url, media_id, mpd_id='dash', fatal=False))
|
||||
continue
|
||||
label = format_.get('label')
|
||||
f = parse_resolution(label)
|
||||
f.update({
|
||||
|
|
139
yt_dlp/extractor/chzzk.py
Normal file
139
yt_dlp/extractor/chzzk.py
Normal file
|
@ -0,0 +1,139 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CHZZKLiveIE(InfoExtractor):
|
||||
IE_NAME = 'chzzk:live'
|
||||
_VALID_URL = r'https?://chzzk\.naver\.com/live/(?P<id>[\da-f]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chzzk.naver.com/live/c68b8ef525fb3d2fa146344d84991753',
|
||||
'info_dict': {
|
||||
'id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'channel': '진짜도현',
|
||||
'channel_id': 'c68b8ef525fb3d2fa146344d84991753',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'timestamp': 1705510344,
|
||||
'upload_date': '20240117',
|
||||
'live_status': 'is_live',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
},
|
||||
'skip': 'The channel is not currently live',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
live_detail = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id,
|
||||
note='Downloading channel info', errnote='Unable to download channel info')['content']
|
||||
|
||||
if live_detail.get('status') == 'CLOSE':
|
||||
raise ExtractorError('The channel is not currently live', expected=True)
|
||||
|
||||
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_template = traverse_obj(
|
||||
live_playback, ('thumbnail', 'snapshotThumbnailTemplate', {url_or_none}))
|
||||
if thumbnail_template and '{type}' in thumbnail_template:
|
||||
for width in traverse_obj(live_playback, ('thumbnail', 'types', ..., {str})):
|
||||
thumbnails.append({
|
||||
'id': width,
|
||||
'url': thumbnail_template.replace('{type}', width),
|
||||
'width': int_or_none(width),
|
||||
})
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for media in traverse_obj(live_playback, ('media', lambda _, v: url_or_none(v['path']))):
|
||||
is_low_latency = media.get('mediaId') == 'LLHLS'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media['path'], channel_id, 'mp4', fatal=False, live=True,
|
||||
m3u8_id='hls-ll' if is_low_latency else 'hls')
|
||||
for f in fmts:
|
||||
if is_low_latency:
|
||||
f['source_preference'] = -2
|
||||
if '-afragalow.stream-audio.stream' in f['format_id']:
|
||||
f['quality'] = -2
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(live_detail, {
|
||||
'title': ('liveTitle', {str}),
|
||||
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}),
|
||||
'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
|
||||
'view_count': ('accumulateCount', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelId', {str}),
|
||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CHZZKVideoIE(InfoExtractor):
|
||||
IE_NAME = 'chzzk:video'
|
||||
_VALID_URL = r'https?://chzzk\.naver\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://chzzk.naver.com/video/1754',
|
||||
'md5': 'b0c0c1bb888d913b93d702b1512c7f06',
|
||||
'info_dict': {
|
||||
'id': '1754',
|
||||
'ext': 'mp4',
|
||||
'title': '치지직 테스트 방송',
|
||||
'channel': '침착맨',
|
||||
'channel_id': 'bb382c2c0cc9fa7c86ab3b037fb5799c',
|
||||
'channel_is_verified': False,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 15577,
|
||||
'timestamp': 1702970505.417,
|
||||
'upload_date': '20231219',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_meta = self._download_json(
|
||||
f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id,
|
||||
note='Downloading video info', errnote='Unable to download video info')['content']
|
||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(
|
||||
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id,
|
||||
query={
|
||||
'key': video_meta['inKey'],
|
||||
'env': 'real',
|
||||
'lc': 'en_US',
|
||||
'cpl': 'en_US',
|
||||
}, note='Downloading video playback', errnote='Unable to download video playback')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_meta, {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}),
|
||||
'view_count': ('readCount', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'channel': ('channel', 'channelName', {str}),
|
||||
'channel_id': ('channel', 'channelId', {str}),
|
||||
'channel_is_verified': ('channel', 'verifiedMark', {bool}),
|
||||
}),
|
||||
}
|
|
@ -67,7 +67,10 @@ def _real_extract(self, url):
|
|||
html = self._download_webpage(url, video_id)
|
||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||
|
||||
if idetails.get('err_code') == 1200:
|
||||
err_code = idetails.get('err_code')
|
||||
if err_code == 1002:
|
||||
self.raise_login_required()
|
||||
elif err_code == 1200:
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo restriction. '
|
||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||
|
|
|
@ -46,15 +46,18 @@ def _real_extract(self, url):
|
|||
video_id.split('.')[1] + '==='), video_id)['sub']
|
||||
manifest_base_url = base_url + 'manifest/video.'
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_base_url + 'm3u8', video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
|
79
yt_dlp/extractor/cloudycdn.py
Normal file
79
yt_dlp/extractor/cloudycdn.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CloudyCDNIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700589151,
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
|
||||
'md5': '798828a479151e2444d8dcfbec76e482',
|
||||
'info_dict': {
|
||||
'id': '26e_lv-8-5-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'LV-8-5-1',
|
||||
'timestamp': 1669767167,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
}
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
'md5': '63074e8e6c84ac2a01f2fb8bf03b8f43',
|
||||
'info_dict': {
|
||||
'id': 'cqd_lib-2',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230223',
|
||||
'duration': 629,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
|
||||
'timestamp': 1677181513,
|
||||
'title': 'LIB-2',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
|
||||
|
||||
data = self._download_json(
|
||||
f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
|
||||
video_id, data=urlencode_postdata({
|
||||
'version': '6.4.0',
|
||||
'referer': url,
|
||||
}))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('name', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': ('upload_date', {parse_iso8601}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}),
|
||||
}
|
|
@ -247,6 +247,8 @@ class InfoExtractor:
|
|||
(For internal use only)
|
||||
* http_chunk_size Chunk size for HTTP downloads
|
||||
* ffmpeg_args Extra arguments for ffmpeg downloader
|
||||
* is_dash_periods Whether the format is a result of merging
|
||||
multiple DASH periods.
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
|
||||
rtmp_protocol, rtmp_real_time
|
||||
|
@ -278,7 +280,7 @@ class InfoExtractor:
|
|||
description: Full video description.
|
||||
uploader: Full name of the video uploader.
|
||||
license: License name the video is licensed under.
|
||||
creator: The creator of the video.
|
||||
creators: List of creators of the video.
|
||||
timestamp: UNIX timestamp of the moment the video was uploaded
|
||||
upload_date: Video upload date in UTC (YYYYMMDD).
|
||||
If not explicitly set, calculated from timestamp
|
||||
|
@ -432,14 +434,6 @@ class InfoExtractor:
|
|||
Useful for splits and compilations.
|
||||
disc_number: Number of the disc or other physical medium the track belongs to,
|
||||
as an integer.
|
||||
composer: Deprecated; use "composers" instead.
|
||||
Composer(s) of the piece, comma-separated.
|
||||
artist: Deprecated; use "artists" instead.
|
||||
Artist(s) of the track, comma-separated.
|
||||
genre: Deprecated; use "genres" instead.
|
||||
Genre(s) of the track, comma-separated.
|
||||
album_artist: Deprecated; use "album_artists" instead.
|
||||
All artists appeared on the album, comma-separated.
|
||||
|
||||
The following fields should only be set for clips that should be cut from the original video:
|
||||
|
||||
|
@ -450,6 +444,18 @@ class InfoExtractor:
|
|||
rows: Number of rows in each storyboard fragment, as an integer
|
||||
columns: Number of columns in each storyboard fragment, as an integer
|
||||
|
||||
The following fields are deprecated and should not be set by new code:
|
||||
composer: Use "composers" instead.
|
||||
Composer(s) of the piece, comma-separated.
|
||||
artist: Use "artists" instead.
|
||||
Artist(s) of the track, comma-separated.
|
||||
genre: Use "genres" instead.
|
||||
Genre(s) of the track, comma-separated.
|
||||
album_artist: Use "album_artists" instead.
|
||||
All artists appeared on the album, comma-separated.
|
||||
creator: Use "creators" instead.
|
||||
The creator of the video.
|
||||
|
||||
Unless mentioned otherwise, the fields should be Unicode strings.
|
||||
|
||||
Unless mentioned otherwise, None is equivalent to absence of information.
|
||||
|
@ -2538,7 +2544,11 @@ def _extract_mpd_formats(self, *args, **kwargs):
|
|||
self._report_ignoring_subs('DASH')
|
||||
return fmts
|
||||
|
||||
def _extract_mpd_formats_and_subtitles(
|
||||
def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||
periods = self._extract_mpd_periods(*args, **kwargs)
|
||||
return self._merge_mpd_periods(periods)
|
||||
|
||||
def _extract_mpd_periods(
|
||||
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
|
||||
fatal=True, data=None, headers={}, query={}):
|
||||
|
||||
|
@ -2551,17 +2561,16 @@ def _extract_mpd_formats_and_subtitles(
|
|||
errnote='Failed to download MPD manifest' if errnote is None else errnote,
|
||||
fatal=fatal, data=data, headers=headers, query=query)
|
||||
if res is False:
|
||||
return [], {}
|
||||
return []
|
||||
mpd_doc, urlh = res
|
||||
if mpd_doc is None:
|
||||
return [], {}
|
||||
return []
|
||||
|
||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||
mpd_url = urlh.url
|
||||
mpd_base_url = base_url(mpd_url)
|
||||
|
||||
return self._parse_mpd_formats_and_subtitles(
|
||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||
|
||||
def _parse_mpd_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
|
||||
|
@ -2569,8 +2578,39 @@ def _parse_mpd_formats(self, *args, **kwargs):
|
|||
self._report_ignoring_subs('DASH')
|
||||
return fmts
|
||||
|
||||
def _parse_mpd_formats_and_subtitles(
|
||||
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||
periods = self._parse_mpd_periods(*args, **kwargs)
|
||||
return self._merge_mpd_periods(periods)
|
||||
|
||||
def _merge_mpd_periods(self, periods):
|
||||
"""
|
||||
Combine all formats and subtitles from an MPD manifest into a single list,
|
||||
by concatenate streams with similar formats.
|
||||
"""
|
||||
formats, subtitles = {}, {}
|
||||
for period in periods:
|
||||
for f in period['formats']:
|
||||
assert 'is_dash_periods' not in f, 'format already processed'
|
||||
f['is_dash_periods'] = True
|
||||
format_key = tuple(v for k, v in f.items() if k not in (
|
||||
('format_id', 'fragments', 'manifest_stream_number')))
|
||||
if format_key not in formats:
|
||||
formats[format_key] = f
|
||||
elif 'fragments' in f:
|
||||
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
|
||||
|
||||
if subtitles and period['subtitles']:
|
||||
self.report_warning(bug_reports_message(
|
||||
'Found subtitles in multiple periods in the DASH manifest; '
|
||||
'if part of the subtitles are missing,'
|
||||
), only_once=True)
|
||||
|
||||
for sub_lang, sub_info in period['subtitles'].items():
|
||||
subtitles.setdefault(sub_lang, []).extend(sub_info)
|
||||
|
||||
return list(formats.values()), subtitles
|
||||
|
||||
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||
"""
|
||||
Parse formats from MPD manifest.
|
||||
References:
|
||||
|
@ -2649,9 +2689,13 @@ def extract_Initialization(source):
|
|||
return ms_info
|
||||
|
||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||
formats, subtitles = [], {}
|
||||
stream_numbers = collections.defaultdict(int)
|
||||
for period in mpd_doc.findall(_add_ns('Period')):
|
||||
for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
|
||||
period_entry = {
|
||||
'id': period.get('id', f'period-{period_idx}'),
|
||||
'formats': [],
|
||||
'subtitles': collections.defaultdict(list),
|
||||
}
|
||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||
period_ms_info = extract_multisegment_info(period, {
|
||||
'start_number': 1,
|
||||
|
@ -2901,11 +2945,10 @@ def add_segment_url():
|
|||
if content_type in ('video', 'audio', 'image/jpeg'):
|
||||
f['manifest_stream_number'] = stream_numbers[f['url']]
|
||||
stream_numbers[f['url']] += 1
|
||||
formats.append(f)
|
||||
period_entry['formats'].append(f)
|
||||
elif content_type == 'text':
|
||||
subtitles.setdefault(lang or 'und', []).append(f)
|
||||
|
||||
return formats, subtitles
|
||||
period_entry['subtitles'][lang or 'und'].append(f)
|
||||
yield period_entry
|
||||
|
||||
def _extract_ism_formats(self, *args, **kwargs):
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
|
||||
|
|
|
@ -33,10 +33,7 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(
|
||||
'http://embed.crooksandliars.com/embed/%s' % video_id, video_id)
|
||||
|
||||
manifest = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'),
|
||||
video_id)
|
||||
manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id)
|
||||
|
||||
quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high'))
|
||||
|
||||
|
|
72
yt_dlp/extractor/elementorembed.py
Normal file
72
yt_dlp/extractor/elementorembed.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import unescapeHTML, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ElementorEmbedIE(InfoExtractor):
|
||||
_VALID_URL = False
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://capitaltv.cy/2023/12/14/υγεια-και-ζωη-14-12-2023-δρ-ξενια-κωσταντινιδο/',
|
||||
'info_dict': {
|
||||
'id': 'KgzuxwuQwM4',
|
||||
'ext': 'mp4',
|
||||
'title': 'ΥΓΕΙΑ ΚΑΙ ΖΩΗ 14 12 2023 ΔΡ ΞΕΝΙΑ ΚΩΣΤΑΝΤΙΝΙΔΟΥ',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/KgzuxwuQwM4/maxresdefault.jpg',
|
||||
'playable_in_embed': True,
|
||||
'tags': 'count:16',
|
||||
'like_count': int,
|
||||
'channel': 'Capital TV Cyprus',
|
||||
'channel_id': 'UCR8LwVKTLGEXt4ZAErpCMrg',
|
||||
'availability': 'public',
|
||||
'description': 'md5:7a3308a22881aea4612358c4ba121f77',
|
||||
'duration': 2891,
|
||||
'upload_date': '20231214',
|
||||
'uploader_id': '@capitaltvcyprus6389',
|
||||
'live_status': 'not_live',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCR8LwVKTLGEXt4ZAErpCMrg',
|
||||
'uploader_url': 'https://www.youtube.com/@capitaltvcyprus6389',
|
||||
'uploader': 'Capital TV Cyprus',
|
||||
'age_limit': 0,
|
||||
'categories': ['News & Politics'],
|
||||
'view_count': int,
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://elementor.com/academy/theme-builder-collection/?playlist=76011151&video=9e59909',
|
||||
'info_dict': {
|
||||
'id': '?playlist=76011151&video=9e59909',
|
||||
'title': 'Theme Builder Collection - Academy',
|
||||
'age_limit': 0,
|
||||
'timestamp': 1702196984.0,
|
||||
'upload_date': '20231210',
|
||||
'description': 'md5:7f52c52715ee9e54fd7f82210511673d',
|
||||
'thumbnail': 'https://elementor.com/academy/wp-content/uploads/2021/07/Theme-Builder-1.png',
|
||||
},
|
||||
'playlist_count': 11,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_WIDGET_REGEX = r'<div[^>]+class="[^"]*elementor-widget-video(?:-playlist)?[^"]*"[^>]*data-settings="([^"]*)"'
|
||||
|
||||
def _extract_from_webpage(self, url, webpage):
|
||||
for data_settings in re.findall(self._WIDGET_REGEX, webpage):
|
||||
data = self._parse_json(data_settings, None, fatal=False, transform_source=unescapeHTML)
|
||||
if youtube_url := traverse_obj(data, ('youtube_url', {url_or_none})):
|
||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
||||
|
||||
for video in traverse_obj(data, ('tabs', lambda _, v: v['_id'], {dict})):
|
||||
if youtube_url := traverse_obj(video, ('youtube_url', {url_or_none})):
|
||||
yield self.url_result(youtube_url, ie=YoutubeIE)
|
||||
if vimeo_url := traverse_obj(video, ('vimeo_url', {url_or_none})):
|
||||
yield self.url_result(vimeo_url, ie=VimeoIE)
|
||||
for direct_url in traverse_obj(video, (('hosted_url', 'external_url'), 'url', {url_or_none})):
|
||||
yield {
|
||||
'id': video['_id'],
|
||||
'url': direct_url,
|
||||
'title': video.get('title'),
|
||||
}
|
|
@ -1,8 +1,10 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
encode_base_n,
|
||||
get_elements_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
|
@ -81,6 +83,7 @@ def calc_hash(s):
|
|||
sources = video['sources']
|
||||
|
||||
formats = []
|
||||
has_av1 = bool(get_elements_by_class('download-av1', webpage))
|
||||
for kind, formats_dict in sources.items():
|
||||
if not isinstance(formats_dict, dict):
|
||||
continue
|
||||
|
@ -106,6 +109,14 @@ def calc_hash(s):
|
|||
'height': height,
|
||||
'fps': fps,
|
||||
})
|
||||
if has_av1:
|
||||
formats.append({
|
||||
'url': src.replace('.mp4', '-av1.mp4'),
|
||||
'format_id': join_nonempty('av1', format_id),
|
||||
'height': height,
|
||||
'fps': fps,
|
||||
'vcodec': 'av1',
|
||||
})
|
||||
|
||||
json_ld = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
|
|
224
yt_dlp/extractor/err.py
Normal file
224
yt_dlp/extractor/err.py
Normal file
|
@ -0,0 +1,224 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ERRJupiterIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:jupiter(?:pluss)?|lasteekraan)\.err\.ee/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'note': 'Jupiter: Movie: siin-me-oleme',
|
||||
'url': 'https://jupiter.err.ee/1211107/siin-me-oleme',
|
||||
'md5': '9b45d1682a98853acaa1e1b0c791f425',
|
||||
'info_dict': {
|
||||
'id': '1211107',
|
||||
'ext': 'mp4',
|
||||
'title': 'Siin me oleme!',
|
||||
'alt_title': '',
|
||||
'description': 'md5:1825b795f5f7584241aeb59e5bbb4f70',
|
||||
'release_date': '20231226',
|
||||
'upload_date': '20201217',
|
||||
'modified_date': '20201217',
|
||||
'release_timestamp': 1703577600,
|
||||
'timestamp': 1608210000,
|
||||
'modified_timestamp': 1608220800,
|
||||
'release_year': 1978,
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter: Series: Impulss',
|
||||
'url': 'https://jupiter.err.ee/1609145945/impulss',
|
||||
'md5': 'a378486df07ed1ba74e46cc861886243',
|
||||
'info_dict': {
|
||||
'id': '1609145945',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impulss',
|
||||
'alt_title': 'Loteriipilet hooldekodusse',
|
||||
'description': 'md5:fa8a2ed0cdccb130211513443ee4d571',
|
||||
'release_date': '20231107',
|
||||
'upload_date': '20231026',
|
||||
'modified_date': '20231118',
|
||||
'release_timestamp': 1699380000,
|
||||
'timestamp': 1698327601,
|
||||
'modified_timestamp': 1700311802,
|
||||
'series': 'Impulss',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Loteriipilet hooldekodusse',
|
||||
'episode_number': 6,
|
||||
'series_id': '1609108187',
|
||||
'release_year': 2023,
|
||||
'episode_id': '1609145945',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter: Radio Show: mnemoturniir episode',
|
||||
'url': 'https://jupiter.err.ee/1037919/mnemoturniir',
|
||||
'md5': 'f1eb95fe66f9620ff84e81bbac37076a',
|
||||
'info_dict': {
|
||||
'id': '1037919',
|
||||
'ext': 'm4a',
|
||||
'title': 'Mnemoturniir',
|
||||
'alt_title': '',
|
||||
'description': 'md5:626db52394e7583c26ab74d6a34d9982',
|
||||
'release_date': '20240121',
|
||||
'upload_date': '20240108',
|
||||
'modified_date': '20240121',
|
||||
'release_timestamp': 1705827900,
|
||||
'timestamp': 1704675602,
|
||||
'modified_timestamp': 1705827601,
|
||||
'series': 'Mnemoturniir',
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'episode': 'Episode 0',
|
||||
'episode_number': 0,
|
||||
'series_id': '1037919',
|
||||
'release_year': 2024,
|
||||
'episode_id': '1609215101',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Clip: bolee-zelenyj-tallinn',
|
||||
'url': 'https://jupiterpluss.err.ee/1609180445/bolee-zelenyj-tallinn',
|
||||
'md5': '1b812270c4daf6ce51c06bfeaf33ed95',
|
||||
'info_dict': {
|
||||
'id': '1609180445',
|
||||
'ext': 'mp4',
|
||||
'title': 'Более зеленый Таллинн',
|
||||
'alt_title': '',
|
||||
'description': 'md5:fd34d9bf939c28c4a725b19a7f0d6320',
|
||||
'release_date': '20231224',
|
||||
'upload_date': '20231130',
|
||||
'modified_date': '20231207',
|
||||
'release_timestamp': 1703423400,
|
||||
'timestamp': 1701338400,
|
||||
'modified_timestamp': 1701967200,
|
||||
'release_year': 2023,
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Series: The Sniffer',
|
||||
'url': 'https://jupiterpluss.err.ee/1608311387/njuhach',
|
||||
'md5': '2abdeb7131ce551bce49e8d0cea08536',
|
||||
'info_dict': {
|
||||
'id': '1608311387',
|
||||
'ext': 'mp4',
|
||||
'title': 'Нюхач',
|
||||
'alt_title': '',
|
||||
'description': 'md5:8c5c7d8f32ec6e54cd498c9e59ca83bc',
|
||||
'release_date': '20230601',
|
||||
'upload_date': '20210818',
|
||||
'modified_date': '20210903',
|
||||
'release_timestamp': 1685633400,
|
||||
'timestamp': 1629318000,
|
||||
'modified_timestamp': 1630686000,
|
||||
'release_year': 2013,
|
||||
'episode': 'Episode 1',
|
||||
'episode_id': '1608311390',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Нюхач',
|
||||
'series_id': '1608311387',
|
||||
},
|
||||
}, {
|
||||
'note': 'Jupiter+: Podcast: lesnye-istorii-aisty',
|
||||
'url': 'https://jupiterpluss.err.ee/1608990335/lesnye-istorii-aisty',
|
||||
'md5': '8b46d7e4510b254a14b7a52211b5bf96',
|
||||
'info_dict': {
|
||||
'id': '1608990335',
|
||||
'ext': 'm4a',
|
||||
'title': 'Лесные истории | Аисты',
|
||||
'alt_title': '',
|
||||
'description': 'md5:065e721623e271e7a63e6540d409ca6b',
|
||||
'release_date': '20230609',
|
||||
'upload_date': '20230527',
|
||||
'modified_date': '20230608',
|
||||
'release_timestamp': 1686308700,
|
||||
'timestamp': 1685145600,
|
||||
'modified_timestamp': 1686252600,
|
||||
'release_year': 2023,
|
||||
'episode': 'Episode 0',
|
||||
'episode_id': '1608990335',
|
||||
'episode_number': 0,
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'series': 'Лесные истории | Аисты',
|
||||
'series_id': '1037497',
|
||||
}
|
||||
}, {
|
||||
'note': 'Lasteekraan: Pätu',
|
||||
'url': 'https://lasteekraan.err.ee/1092243/patu',
|
||||
'md5': 'a67eb9b9bcb3d201718c15d1638edf77',
|
||||
'info_dict': {
|
||||
'id': '1092243',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pätu',
|
||||
'alt_title': '',
|
||||
'description': 'md5:64a7b5a80afd7042d3f8ec48c77befd9',
|
||||
'release_date': '20230614',
|
||||
'upload_date': '20200520',
|
||||
'modified_date': '20200520',
|
||||
'release_timestamp': 1686745800,
|
||||
'timestamp': 1589975640,
|
||||
'modified_timestamp': 1589975640,
|
||||
'release_year': 1990,
|
||||
'episode': 'Episode 1',
|
||||
'episode_id': '1092243',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Pätu',
|
||||
'series_id': '1092236',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://services.err.ee/api/v2/vodContent/getContentPageData', video_id,
|
||||
query={'contentId': video_id})['data']['mainContent']
|
||||
|
||||
media_data = traverse_obj(data, ('medias', ..., {dict}), get_all=False)
|
||||
if traverse_obj(media_data, ('restrictions', 'drm', {bool})):
|
||||
self.report_drm(video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for format_url in set(traverse_obj(media_data, ('src', ('hls', 'hls2', 'hlsNew'), {url_or_none}))):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
for format_url in set(traverse_obj(media_data, ('src', ('dash', 'dashNew'), {url_or_none}))):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if format_url := traverse_obj(media_data, ('src', 'file', {url_or_none})):
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(data, {
|
||||
'title': ('heading', {str}),
|
||||
'alt_title': ('subHeading', {str}),
|
||||
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}),
|
||||
'timestamp': ('created', {int_or_none}),
|
||||
'modified_timestamp': ('updated', {int_or_none}),
|
||||
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}, get_all=False),
|
||||
**(traverse_obj(data, {
|
||||
'series': ('heading', {str}),
|
||||
'series_id': ('rootContentId', {str_or_none}),
|
||||
'episode': ('subHeading', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'episode_id': ('id', {str_or_none}),
|
||||
}) if data.get('type') == 'episode' else {}),
|
||||
}
|
|
@ -20,6 +20,7 @@
|
|||
get_element_by_id,
|
||||
get_first,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
|
@ -43,6 +44,7 @@ class FacebookIE(InfoExtractor):
|
|||
(?:[^#]*?\#!/)?
|
||||
(?:
|
||||
(?:
|
||||
permalink\.php|
|
||||
video/video\.php|
|
||||
photo\.php|
|
||||
video\.php|
|
||||
|
@ -52,12 +54,13 @@ class FacebookIE(InfoExtractor):
|
|||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
events/(?:[^/]+/)?|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?P<id>pfbid[A-Za-z0-9]+|\d+)
|
||||
'''
|
||||
_EMBED_REGEX = [
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
|
||||
|
@ -247,6 +250,41 @@ class FacebookIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 148.435,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
|
||||
'info_dict': {
|
||||
'id': '6968553779868435',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:2f2fcf93e97ac00244fe64521bbdb0cb',
|
||||
'uploader': 'ATTN:',
|
||||
'upload_date': '20231207',
|
||||
'title': 'ATTN:',
|
||||
'duration': 132.675,
|
||||
'uploader_id': '100064451419378',
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'timestamp': 1701975646,
|
||||
},
|
||||
}, {
|
||||
# data.node.comet_sections.content.story.attachments[].styles.attachment.media
|
||||
'url': 'https://www.facebook.com/permalink.php?story_fbid=pfbid0fqQuVEQyXRa9Dp4RcaTR14KHU3uULHV1EK7eckNXSH63JMuoALsAvVCJ97zAGitil&id=100068861234290',
|
||||
'info_dict': {
|
||||
'id': '270103405756416',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lela Evans',
|
||||
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Lela Evans',
|
||||
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl',
|
||||
'upload_date': '20231228',
|
||||
'timestamp': 1703804085,
|
||||
'duration': 394.347,
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/story.php?story_fbid=pfbid0Fnzhm8UuzjBYpPMNFzaSpFE9UmLdU4fJN8qTANi1Dmtj5q7DNrL5NERXfsAzDEV7l&id=100073071055552',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/video.php?v=10204634152394104',
|
||||
'only_matching': True,
|
||||
|
@ -362,6 +400,18 @@ class FacebookIE(InfoExtractor):
|
|||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'Requires logging in',
|
||||
}, {
|
||||
# data.event.cover_media_renderer.cover_video
|
||||
'url': 'https://m.facebook.com/events/1509582499515440',
|
||||
'info_dict': {
|
||||
'id': '637246984455045',
|
||||
'ext': 'mp4',
|
||||
'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
|
||||
'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'uploader': 'Comitato Liberi Pensatori',
|
||||
'uploader_id': '100065709540881',
|
||||
},
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
|
@ -436,38 +486,10 @@ def extract_metadata(webpage):
|
|||
r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage)]
|
||||
post = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
subs_data = traverse_obj(post, (..., 'video', ..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')))
|
||||
is_video_broadcast = get_first(subs_data, 'is_video_broadcast', expected_type=bool)
|
||||
captions = get_first(subs_data, 'video_available_captions_locales', 'captions_url')
|
||||
if url_or_none(captions): # if subs_data only had a 'captions_url'
|
||||
locale = self._html_search_meta(['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
subtitles[locale] = [{'url': captions}]
|
||||
# or else subs_data had 'video_available_captions_locales', a list of dicts
|
||||
for caption in traverse_obj(captions, (
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])}, lambda _, v: v['captions_url'])
|
||||
):
|
||||
lang = caption.get('localized_language') or ''
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_video_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
|
||||
media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: (
|
||||
k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict)
|
||||
title = get_first(media, ('title', 'text'))
|
||||
description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict})) or {})
|
||||
|
||||
page_title = title or self._html_search_regex((
|
||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
|
||||
r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>',
|
||||
|
@ -476,11 +498,16 @@ def extract_metadata(webpage):
|
|||
description = description or self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'],
|
||||
webpage, 'description', default=None)
|
||||
uploader_data = (
|
||||
get_first(media, ('owner', {dict}))
|
||||
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
|
||||
or get_first(post, ('node', 'actors', ..., {dict}))
|
||||
or get_first(post, ('event', 'event_creator', {dict})) or {})
|
||||
uploader = uploader_data.get('name') or (
|
||||
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
|
||||
or self._search_regex(
|
||||
(r'ownerName\s*:\s*"([^"]+)"', *self._og_regexes('title')), webpage, 'uploader', fatal=False))
|
||||
|
||||
timestamp = int_or_none(self._search_regex(
|
||||
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
|
||||
'timestamp', default=None))
|
||||
|
@ -502,8 +529,6 @@ def extract_metadata(webpage):
|
|||
webpage, 'view count', default=None)),
|
||||
'concurrent_view_count': get_first(post, (
|
||||
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
info_json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||
|
@ -545,7 +570,11 @@ def process_formats(info):
|
|||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
for f in info['formats']:
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
# with non-browser User-Agent.
|
||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||
|
||||
def extract_relay_data(_filter):
|
||||
return self._parse_json(self._search_regex(
|
||||
|
@ -555,8 +584,8 @@ def extract_relay_data(_filter):
|
|||
def extract_relay_prefetched_data(_filter):
|
||||
return traverse_obj(extract_relay_data(_filter), (
|
||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||
lambda _, v: 'RelayPrefetchedStreamCache' in v, ..., ...,
|
||||
'__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
||||
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
||||
|
||||
if not video_data:
|
||||
server_js_data = self._parse_json(self._search_regex([
|
||||
|
@ -597,6 +626,29 @@ def parse_graphql_video(video):
|
|||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(video, formats)
|
||||
|
||||
automatic_captions, subtitles = {}, {}
|
||||
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))
|
||||
for caption in traverse_obj(video, (
|
||||
'video_available_captions_locales',
|
||||
{lambda x: sorted(x, key=lambda c: c['locale'])},
|
||||
lambda _, v: url_or_none(v['captions_url'])
|
||||
)):
|
||||
lang = caption.get('localized_language') or 'und'
|
||||
subs = {
|
||||
'url': caption['captions_url'],
|
||||
'name': format_field(caption, 'localized_country', f'{lang} (%s)', default=lang),
|
||||
}
|
||||
if caption.get('localized_creation_method') or is_broadcast:
|
||||
automatic_captions.setdefault(caption['locale'], []).append(subs)
|
||||
else:
|
||||
subtitles.setdefault(caption['locale'], []).append(subs)
|
||||
captions_url = traverse_obj(video, ('captions_url', {url_or_none}))
|
||||
if captions_url and not automatic_captions and not subtitles:
|
||||
locale = self._html_search_meta(
|
||||
['og:locale', 'twitter:locale'], webpage, 'locale', default='en_US')
|
||||
(automatic_captions if is_broadcast else subtitles)[locale] = [{'url': captions_url}]
|
||||
|
||||
info = {
|
||||
'id': v_id,
|
||||
'formats': formats,
|
||||
|
@ -606,6 +658,8 @@ def parse_graphql_video(video):
|
|||
'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
|
||||
'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
|
||||
or float_or_none(video.get('length_in_second'))),
|
||||
'automatic_captions': automatic_captions,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
process_formats(info)
|
||||
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||
|
@ -640,7 +694,8 @@ def parse_attachment(attachment, key='media'):
|
|||
for edge in edges:
|
||||
parse_attachment(edge, key='node')
|
||||
|
||||
video = data.get('video') or {}
|
||||
video = traverse_obj(data, (
|
||||
'event', 'cover_media_renderer', 'cover_video'), 'video', expected_type=dict) or {}
|
||||
if video:
|
||||
attachments = try_get(video, [
|
||||
lambda x: x['story']['attachments'],
|
||||
|
@ -659,6 +714,9 @@ def parse_attachment(attachment, key='media'):
|
|||
# honor precise duration in video info
|
||||
if video_info.get('duration'):
|
||||
webpage_info['duration'] = video_info['duration']
|
||||
# preserve preferred_thumbnail in video info
|
||||
if video_info.get('thumbnail'):
|
||||
webpage_info['thumbnail'] = video_info['thumbnail']
|
||||
return merge_dicts(webpage_info, video_info)
|
||||
|
||||
if not video_data:
|
||||
|
@ -889,3 +947,114 @@ def _real_extract(self, url):
|
|||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
f'https://m.facebook.com/watch/?v={video_id}&_rdr', FacebookIE, video_id)
|
||||
|
||||
|
||||
class FacebookAdsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
|
||||
IE_NAME = 'facebook:ads'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
|
||||
'info_dict': {
|
||||
'id': '899206155126718',
|
||||
'ext': 'mp4',
|
||||
'title': 'video by Kandao',
|
||||
'uploader': 'Kandao',
|
||||
'uploader_id': '774114102743284',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1702548330,
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'upload_date': '20231214',
|
||||
'like_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
|
||||
'info_dict': {
|
||||
'id': '893637265423481',
|
||||
'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
|
||||
'uploader': 'Eataly Paris Marais',
|
||||
'uploader_id': '2086668958314152',
|
||||
'uploader_url': r're:^https?://.*',
|
||||
'timestamp': 1703571529,
|
||||
'upload_date': '20231226',
|
||||
'like_count': int,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://m.facebook.com/ads/library/?id=901230958115569',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS_MAP = {
|
||||
'watermarked_video_sd_url': ('sd-wmk', 'SD, watermarked'),
|
||||
'video_sd_url': ('sd', None),
|
||||
'watermarked_video_hd_url': ('hd-wmk', 'HD, watermarked'),
|
||||
'video_hd_url': ('hd', None),
|
||||
}
|
||||
|
||||
def _extract_formats(self, video_dict):
|
||||
formats = []
|
||||
for format_key, format_url in traverse_obj(video_dict, (
|
||||
{dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1])
|
||||
)):
|
||||
formats.append({
|
||||
'format_id': self._FORMATS_MAP[format_key][0],
|
||||
'format_note': self._FORMATS_MAP[format_key][1],
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'quality': qualities(tuple(self._FORMATS_MAP))(format_key),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
post_data = [self._parse_json(j, video_id, fatal=False)
|
||||
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)]
|
||||
data = traverse_obj(post_data, (
|
||||
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False)
|
||||
if not data:
|
||||
raise ExtractorError('Unable to extract ad data')
|
||||
|
||||
title = data.get('title')
|
||||
if not title or title == '{{product.name}}':
|
||||
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
|
||||
|
||||
info_dict = traverse_obj(data, {
|
||||
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}),
|
||||
'uploader': ('page_name', {str}),
|
||||
'uploader_id': ('page_id', {str_or_none}),
|
||||
'uploader_url': ('page_profile_uri', {url_or_none}),
|
||||
'timestamp': ('creation_time', {int_or_none}),
|
||||
'like_count': ('page_like_count', {int_or_none}),
|
||||
})
|
||||
|
||||
entries = []
|
||||
for idx, entry in enumerate(traverse_obj(
|
||||
data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1
|
||||
):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{idx}',
|
||||
'title': entry.get('title') or title,
|
||||
'description': entry.get('link_description') or info_dict.get('description'),
|
||||
'thumbnail': url_or_none(entry.get('video_preview_image_url')),
|
||||
'formats': self._extract_formats(entry),
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
info_dict.update(entries[0])
|
||||
|
||||
elif len(entries) > 1:
|
||||
info_dict.update({
|
||||
'title': entries[0]['title'],
|
||||
'entries': entries,
|
||||
'_type': 'playlist',
|
||||
})
|
||||
|
||||
info_dict['id'] = video_id
|
||||
|
||||
return info_dict
|
||||
|
|
62
yt_dlp/extractor/flextv.py
Normal file
62
yt_dlp/extractor/flextv.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FlexTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?flextv\.co\.kr/channels/(?P<id>\d+)/live'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.flextv.co.kr/channels/231638/live',
|
||||
'info_dict': {
|
||||
'id': '231638',
|
||||
'ext': 'mp4',
|
||||
'title': r're:^214하나만\.\.\. ',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'upload_date': r're:\d{8}',
|
||||
'timestamp': int,
|
||||
'live_status': 'is_live',
|
||||
'channel': 'Hi별',
|
||||
'channel_id': '244396',
|
||||
},
|
||||
'skip': 'The channel is offline',
|
||||
}, {
|
||||
'url': 'https://www.flextv.co.kr/channels/746/live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
stream_data = self._download_json(
|
||||
f'https://api.flextv.co.kr/api/channels/{channel_id}/stream',
|
||||
channel_id, query={'option': 'all'})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise UserNotLive(video_id=channel_id)
|
||||
raise
|
||||
|
||||
playlist_url = stream_data['sources'][0]['url']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
playlist_url, channel_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
**traverse_obj(stream_data, {
|
||||
'title': ('stream', 'title', {str}),
|
||||
'timestamp': ('stream', 'createdAt', {parse_iso8601}),
|
||||
'thumbnail': ('thumbUrl', {url_or_none}),
|
||||
'channel': ('owner', 'name', {str}),
|
||||
'channel_id': ('owner', 'id', {str_or_none}),
|
||||
}),
|
||||
}
|
|
@ -11,6 +11,7 @@
|
|||
join_nonempty,
|
||||
parse_codecs,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
@ -108,6 +109,64 @@ class FloatplaneIE(InfoExtractor):
|
|||
'availability': 'subscriber_only',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.floatplane.com/post/65B5PNoBtf',
|
||||
'info_dict': {
|
||||
'id': '65B5PNoBtf',
|
||||
'description': 'I recorded the inbuilt demo mode for your 90\'s enjoyment, thanks for being Floaties!',
|
||||
'display_id': '65B5PNoBtf',
|
||||
'like_count': int,
|
||||
'release_timestamp': 1701249480,
|
||||
'uploader': 'The Trash Network',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'comment_count': int,
|
||||
'title': 'The $50 electronic drum kit.',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'thumbnail': 'https://pbs.floatplane.com/blogPost_thumbnails/65B5PNoBtf/725555379422705_1701247052743.jpeg',
|
||||
'dislike_count': int,
|
||||
'channel': 'The Drum Thing',
|
||||
'release_date': '20231129',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'ISPJjexylS',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20231129',
|
||||
'release_timestamp': 1701249480,
|
||||
'title': 'The $50 electronic drum kit. .mov',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'thumbnail': 'https://pbs.floatplane.com/video_thumbnails/ISPJjexylS/335202812134041_1701249383392.jpeg',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader': 'The Trash Network',
|
||||
'duration': 622,
|
||||
'channel': 'The Drum Thing',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'qKfxu6fEpu',
|
||||
'ext': 'aac',
|
||||
'release_date': '20231129',
|
||||
'release_timestamp': 1701249480,
|
||||
'title': 'Roland TD-7 Demo.m4a',
|
||||
'channel_id': '64424fe73cd58cbcf8d8e131',
|
||||
'availability': 'subscriber_only',
|
||||
'uploader': 'The Trash Network',
|
||||
'duration': 114,
|
||||
'channel': 'The Drum Thing',
|
||||
'uploader_id': '61bc20c9a131fb692bf2a513',
|
||||
'channel_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home/thedrumthing',
|
||||
'uploader_url': 'https://www.floatplane.com/channel/TheTrashNetwork/home',
|
||||
},
|
||||
}],
|
||||
'skip': 'requires subscription: "The Trash Network"',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
|
@ -124,6 +183,22 @@ def _real_extract(self, url):
|
|||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
|
||||
common_info = {
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
**traverse_obj(post_data, {
|
||||
'uploader': ('creator', 'title', {str}),
|
||||
'uploader_id': ('creator', 'id', {str}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
|
@ -150,11 +225,11 @@ def format_path(params):
|
|||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name']))))
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_id': 'name',
|
||||
'format_note': 'label',
|
||||
'format_id': ('name', {str}),
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
|
@ -164,38 +239,28 @@ def format_path(params):
|
|||
})
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname')))
|
||||
|
||||
post_info = {
|
||||
**common_info,
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': 'title',
|
||||
'title': ('title', {str}),
|
||||
'description': ('text', {clean_html}),
|
||||
'uploader': ('creator', 'title'),
|
||||
'uploader_id': ('creator', 'id'),
|
||||
'channel': ('channel', 'title'),
|
||||
'channel_id': ('channel', 'id'),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnail', 'path'),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': channel_url,
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
|
|
|
@ -1,25 +1,29 @@
|
|||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FunkIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
|
||||
'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81',
|
||||
'md5': '8610449476156f338761a75391b0017d',
|
||||
'info_dict': {
|
||||
'id': '1155821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
|
||||
'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
|
||||
'description': 'md5:2a03b67596eda0d1b5125c299f45e953',
|
||||
'timestamp': 1514507395,
|
||||
'upload_date': '20171229',
|
||||
'duration': 426.0,
|
||||
'cast': ['United Creators PMB GmbH'],
|
||||
'thumbnail': 'https://assets.nexx.cloud/media/75/56/79/3YKUSJN1LACN0CRxL.jpg',
|
||||
'display_id': 'die-lustigsten-instrumente-aus-dem-internet-teil-2',
|
||||
'alt_title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet Teil 2',
|
||||
'season_number': 0,
|
||||
'season': 'Season 0',
|
||||
'episode_number': 0,
|
||||
'episode': 'Episode 0',
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
|
||||
'only_matching': True,
|
||||
|
@ -27,18 +31,10 @@ class FunkIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
display_id, nexx_id = self._match_valid_url(url).groups()
|
||||
video = self._download_json(
|
||||
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'nexx:741:' + nexx_id,
|
||||
'url': f'nexx:741:{nexx_id}',
|
||||
'ie_key': NexxIE.ie_key(),
|
||||
'id': nexx_id,
|
||||
'title': video.get('title'),
|
||||
'description': video.get('description'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
'channel_id': str_or_none(video.get('channelId')),
|
||||
'display_id': display_id,
|
||||
'tags': video.get('tags'),
|
||||
'thumbnail': video.get('imageUrlLandscape'),
|
||||
}
|
||||
|
|
179
yt_dlp/extractor/getcourseru.py
Normal file
179
yt_dlp/extractor/getcourseru.py
Normal file
|
@ -0,0 +1,179 @@
|
|||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GetCourseRuPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
|
||||
_TESTS = [{
|
||||
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'title': '190bdf93f1b29735309853a7a19e24b3',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, None, 'Downloading player page')
|
||||
window_configs = self._search_json(
|
||||
r'window\.configs\s*=', webpage, 'config', None)
|
||||
video_id = str(window_configs['gcFileId'])
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
window_configs['masterPlaylistUrl'], video_id)
|
||||
|
||||
return {
|
||||
**traverse_obj(window_configs, {
|
||||
'title': ('videoHash', {str}),
|
||||
'thumbnail': ('previewUrl', {url_or_none}),
|
||||
'duration': ('videoDuration', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
|
||||
class GetCourseRuIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'getcourseru'
|
||||
_DOMAINS = [
|
||||
'academymel.online',
|
||||
'marafon.mani-beauty.com',
|
||||
'on.psbook.ru'
|
||||
]
|
||||
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
|
||||
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://academymel.online/3video_1',
|
||||
'info_dict': {
|
||||
'id': '3059742',
|
||||
'display_id': '3video_1',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'ext': 'mp4',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://academymel.getcourse.ru/3video_1',
|
||||
'info_dict': {
|
||||
'id': '3059742',
|
||||
'display_id': '3video_1',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '513573381',
|
||||
'ext': 'mp4',
|
||||
'title': 'Промоуроки Академии МЕЛ',
|
||||
'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
|
||||
'duration': 1693
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
|
||||
'info_dict': {
|
||||
'id': '319141781',
|
||||
'title': '1. Разминка у стены',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '4919601',
|
||||
'ext': 'mp4',
|
||||
'title': '1. Разминка у стены',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
|
||||
'duration': 704
|
||||
},
|
||||
}],
|
||||
'skip': 'paid lesson'
|
||||
}, {
|
||||
'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
|
||||
'info_dict': {
|
||||
'id': '272499894',
|
||||
'title': 'Мотивация к тренировкам',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '447479687',
|
||||
'ext': 'mp4',
|
||||
'title': 'Мотивация к тренировкам',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
|
||||
'duration': 30
|
||||
},
|
||||
}],
|
||||
'skip': 'paid lesson'
|
||||
}, {
|
||||
'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_LOGIN_URL_PATH = '/cms/system/login'
|
||||
|
||||
def _login(self, hostname, username, password):
|
||||
if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
|
||||
return
|
||||
login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
|
||||
webpage = self._download_webpage(login_url, None)
|
||||
|
||||
self._request_webpage(
|
||||
login_url, None, 'Logging in', 'Failed to log in',
|
||||
data=urlencode_postdata({
|
||||
'action': 'processXdget',
|
||||
'xdgetId': self._html_search_regex(
|
||||
r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
|
||||
webpage, 'xdgetId'),
|
||||
'params[action]': 'login',
|
||||
'params[url]': login_url,
|
||||
'params[object_type]': 'cms_page',
|
||||
'params[object_id]': -1,
|
||||
'params[email]': username,
|
||||
'params[password]': password,
|
||||
'requestTime': int(time.time()),
|
||||
'requestSimpleSign': self._html_search_regex(
|
||||
r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
hostname = urllib.parse.urlparse(url).hostname
|
||||
username, password = self._get_login_info(netrc_machine=hostname)
|
||||
if username:
|
||||
self._login(hostname, username, password)
|
||||
|
||||
display_id = self._match_id(url)
|
||||
# NB: 404 is returned due to yt-dlp not properly following redirects #9020
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=404)
|
||||
if self._LOGIN_URL_PATH in urlh.url or urlh.status == 404:
|
||||
raise ExtractorError(
|
||||
f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
|
||||
expected=True)
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
|
||||
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
|
||||
playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
|
||||
'url_transparent': True,
|
||||
'title': title,
|
||||
})
|
|
@ -66,7 +66,7 @@ def _entries(self, file_id):
|
|||
query_params = {
|
||||
'contentId': file_id,
|
||||
'token': self._TOKEN,
|
||||
'websiteToken': '7fd94ds12fds4', # From https://gofile.io/dist/js/alljs.js
|
||||
'wt': '4fd6sg89d7s6', # From https://gofile.io/dist/js/alljs.js
|
||||
}
|
||||
password = self.get_param('videopassword')
|
||||
if password:
|
||||
|
|
|
@ -19,9 +19,9 @@ class GoogleDriveIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:docs|drive)\.google\.com/
|
||||
(?:docs|drive|drive\.usercontent)\.google\.com/
|
||||
(?:
|
||||
(?:uc|open)\?.*?id=|
|
||||
(?:uc|open|download)\?.*?id=|
|
||||
file/d/
|
||||
)|
|
||||
video\.google\.com/get_player\?.*?docid=
|
||||
|
@ -53,6 +53,9 @@ class GoogleDriveIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://drive.google.com/uc?id=0B2fjwgkl1A_CX083Tkowdmt6d28',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://drive.usercontent.google.com/download?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_FORMATS_EXT = {
|
||||
'5': 'flv',
|
||||
|
@ -205,9 +208,10 @@ def get_value(key):
|
|||
formats.append(f)
|
||||
|
||||
source_url = update_url_query(
|
||||
'https://drive.google.com/uc', {
|
||||
'https://drive.usercontent.google.com/download', {
|
||||
'id': video_id,
|
||||
'export': 'download',
|
||||
'confirm': 't',
|
||||
})
|
||||
|
||||
def request_source_file(source_url, kind, data=None):
|
||||
|
|
|
@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
|
|||
'title': 'A Family for the Holidays',
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
|
||||
'ext': 'mp4',
|
||||
'title': 'S11 - Aflevering 1',
|
||||
'episode': 'Episode 1',
|
||||
'series': 'De Mol',
|
||||
'season_number': 11,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 11'
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
}]
|
||||
|
||||
_id_token = None
|
||||
|
@ -77,16 +93,39 @@ def _real_extract(self, url):
|
|||
|
||||
api = self._download_json(
|
||||
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
|
||||
video_id, headers={'Authorization': 'Bearer %s' % self._id_token})
|
||||
video_id, headers={
|
||||
'Authorization': 'Bearer %s' % self._id_token,
|
||||
**self.geo_verification_headers(),
|
||||
})
|
||||
|
||||
formats, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
if 'manifestUrls' in api:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
|
||||
|
||||
else:
|
||||
if 'ssai' not in api:
|
||||
raise ExtractorError('expecting Google SSAI stream')
|
||||
|
||||
ssai_content_source_id = api['ssai']['contentSourceID']
|
||||
ssai_video_id = api['ssai']['videoID']
|
||||
|
||||
dai = self._download_json(
|
||||
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
|
||||
video_id, data=b'{"api-key":"null"}',
|
||||
headers={'content-type': 'application/json'})
|
||||
|
||||
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
|
||||
|
||||
# skip pre-roll and mid-roll ads
|
||||
periods = [p for p in periods if '-ad-' not in p['id']]
|
||||
|
||||
formats, subtitles = self._merge_mpd_periods(periods)
|
||||
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
|
|
|
@ -57,8 +57,8 @@ def _real_extract(self, url):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
metadata = self._parse_json(
|
||||
self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id)
|
||||
metadata = self._search_json(
|
||||
r'window\.__reflectData\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
video_info = metadata['collectionMedia'][0]
|
||||
media_data = self._download_json(
|
||||
|
@ -99,7 +99,7 @@ def _real_extract(self, url):
|
|||
'duration': int_or_none(
|
||||
video_info.get('source_duration')),
|
||||
'artist': str_or_none(
|
||||
video_info.get('music_track_artist')),
|
||||
video_info.get('music_track_artist')) or None,
|
||||
'track': str_or_none(
|
||||
video_info.get('music_track_name')),
|
||||
video_info.get('music_track_name')) or None,
|
||||
}
|
||||
|
|
69
yt_dlp/extractor/ilpost.py
Normal file
69
yt_dlp/extractor/ilpost.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class IlPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ilpost\.it/episodes/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ilpost.it/episodes/1-avis-akvasas-ka/',
|
||||
'md5': '43649f002d85e1c2f319bb478d479c40',
|
||||
'info_dict': {
|
||||
'id': '2972047',
|
||||
'ext': 'mp3',
|
||||
'display_id': '1-avis-akvasas-ka',
|
||||
'title': '1. Avis akvasas ka',
|
||||
'url': 'https://www.ilpost.it/wp-content/uploads/2023/12/28/1703781217-l-invasione-pt1-v6.mp3',
|
||||
'timestamp': 1703835014,
|
||||
'upload_date': '20231229',
|
||||
'duration': 2495.0,
|
||||
'availability': 'public',
|
||||
'series_id': '235598',
|
||||
'description': '',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
endpoint_metadata = self._search_json(
|
||||
r'var\s+ilpostpodcast\s*=', webpage, 'metadata', display_id)
|
||||
episode_id = endpoint_metadata['post_id']
|
||||
podcast_id = endpoint_metadata['podcast_id']
|
||||
podcast_metadata = self._download_json(
|
||||
endpoint_metadata['ajax_url'], display_id, data=urlencode_postdata({
|
||||
'action': 'checkpodcast',
|
||||
'cookie': endpoint_metadata['cookie'],
|
||||
'post_id': episode_id,
|
||||
'podcast_id': podcast_id,
|
||||
}))
|
||||
|
||||
episode = traverse_obj(podcast_metadata, (
|
||||
'data', 'postcastList', lambda _, v: str(v['id']) == episode_id, {dict}), get_all=False)
|
||||
if not episode:
|
||||
raise ExtractorError('Episode could not be extracted')
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'display_id': display_id,
|
||||
'series_id': podcast_id,
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(episode, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'url': ('podcast_raw_url', {url_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}),
|
||||
'timestamp': ('timestamp', {int_or_none}),
|
||||
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
|
||||
}),
|
||||
}
|
|
@ -1,5 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
|
@ -20,39 +21,64 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
|||
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
||||
'md5': '7b1f70de088ede3a152ea34aece4df42',
|
||||
'md5': '3b84396d15ed9e083c3106f1fa589c04',
|
||||
'info_dict': {
|
||||
'id': 'OQsEfQFVUXk',
|
||||
'ext': 'mp3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Leja Re',
|
||||
'album': 'Leja Re',
|
||||
'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
|
||||
'duration': 205,
|
||||
'view_count': int,
|
||||
'release_year': 2018,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_VALID_BITRATES = ('16', '32', '64', '128', '320')
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
|
||||
if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
|
||||
raise ValueError(
|
||||
f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
|
||||
+ f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
|
||||
|
||||
song_data = self._extract_initial_data(url, audio_id)['song']['song']
|
||||
media_data = self._download_json(
|
||||
'https://www.jiosaavn.com/api.php', audio_id, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': '128',
|
||||
'url': song_data['encrypted_media_url'],
|
||||
}))
|
||||
formats = []
|
||||
for bitrate in extract_bitrates:
|
||||
media_data = self._download_json(
|
||||
'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
|
||||
fatal=False, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': bitrate,
|
||||
'url': song_data['encrypted_media_url'],
|
||||
}))
|
||||
if not media_data.get('auth_url'):
|
||||
self.report_warning(f'Unable to extract format info for {bitrate}')
|
||||
continue
|
||||
formats.append({
|
||||
'url': media_data['auth_url'],
|
||||
'ext': media_data.get('type'),
|
||||
'format_id': bitrate,
|
||||
'abr': int(bitrate),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'url': media_data['auth_url'],
|
||||
'ext': media_data.get('type'),
|
||||
'vcodec': 'none',
|
||||
'formats': formats,
|
||||
**traverse_obj(song_data, {
|
||||
'title': ('title', 'text'),
|
||||
'album': ('album', 'text'),
|
||||
'thumbnail': ('image', 0, {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
|
140
yt_dlp/extractor/kukululive.py
Normal file
140
yt_dlp/extractor/kukululive.py
Normal file
|
@ -0,0 +1,140 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
filter_dict,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
qualities,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class KukuluLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://live\.erinn\.biz/live\.php\?h(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://live.erinn.biz/live.php?h675134569',
|
||||
'md5': 'e380fa6a47fc703d91cea913ab44ec2e',
|
||||
'info_dict': {
|
||||
'id': '675134569',
|
||||
'ext': 'mp4',
|
||||
'title': 'プロセカ',
|
||||
'description': 'テストも兼ねたプロセカ配信。',
|
||||
'timestamp': 1702689148,
|
||||
'upload_date': '20231216',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://live.erinn.biz/live.php?h102338092',
|
||||
'md5': 'dcf5167a934b1c60333461e13a81a6e2',
|
||||
'info_dict': {
|
||||
'id': '102338092',
|
||||
'ext': 'mp4',
|
||||
'title': 'Among Usで遊びます!!',
|
||||
'description': 'VTuberになりましたねんねこ㌨ですよろしくお願いします',
|
||||
'timestamp': 1704603118,
|
||||
'upload_date': '20240107',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://live.erinn.biz/live.php?h878049531',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _get_quality_meta(self, video_id, desc, code, force_h264=None):
|
||||
desc += ' (force_h264)' if force_h264 else ''
|
||||
qs = self._download_webpage(
|
||||
'https://live.erinn.biz/live.player.fplayer.php', video_id,
|
||||
f'Downloading {desc} quality metadata', f'Unable to download {desc} quality metadata',
|
||||
query=filter_dict({
|
||||
'hash': video_id,
|
||||
'action': f'get{code}liveByAjax',
|
||||
'force_h264': force_h264,
|
||||
}))
|
||||
return urllib.parse.parse_qs(qs)
|
||||
|
||||
def _add_quality_formats(self, formats, quality_meta):
|
||||
vcodec = traverse_obj(quality_meta, ('vcodec', 0, {str}))
|
||||
quality = traverse_obj(quality_meta, ('now_quality', 0, {str}))
|
||||
quality_priority = qualities(('low', 'h264', 'high'))(quality)
|
||||
if traverse_obj(quality_meta, ('hlsaddr', 0, {url_or_none})):
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'url': quality_meta['hlsaddr'][0],
|
||||
'ext': 'mp4',
|
||||
'vcodec': vcodec,
|
||||
'quality': quality_priority,
|
||||
})
|
||||
if traverse_obj(quality_meta, ('hlsaddr_audioonly', 0, {url_or_none})):
|
||||
formats.append({
|
||||
'format_id': join_nonempty(quality, 'audioonly'),
|
||||
'url': quality_meta['hlsaddr_audioonly'][0],
|
||||
'ext': 'm4a',
|
||||
'vcodec': 'none',
|
||||
'quality': quality_priority,
|
||||
})
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
html = self._download_webpage(url, video_id)
|
||||
|
||||
if '>タイムシフトが見つかりませんでした。<' in html:
|
||||
raise ExtractorError('This stream has expired', expected=True)
|
||||
|
||||
title = clean_html(
|
||||
get_element_by_id('livetitle', html.replace('<SPAN', '<span').replace('SPAN>', 'span>')))
|
||||
description = self._html_search_meta('Description', html)
|
||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], html)
|
||||
|
||||
if self._search_regex(r'(var\s+timeshift\s*=\s*false)', html, 'is livestream', default=False):
|
||||
formats = []
|
||||
for (desc, code) in [('high', 'Z'), ('low', 'ForceLow')]:
|
||||
quality_meta = self._get_quality_meta(video_id, desc, code)
|
||||
self._add_quality_formats(formats, quality_meta)
|
||||
if desc == 'high' and traverse_obj(quality_meta, ('vcodec', 0)) == 'HEVC':
|
||||
self._add_quality_formats(
|
||||
formats, self._get_quality_meta(video_id, desc, code, force_h264='1'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
# VOD extraction
|
||||
player_html = self._download_webpage(
|
||||
'https://live.erinn.biz/live.timeshift.fplayer.php', video_id,
|
||||
'Downloading player html', 'Unable to download player html', query={'hash': video_id})
|
||||
|
||||
sources = traverse_obj(self._search_json(
|
||||
r'var\s+fplayer_source\s*=', player_html, 'stream data', video_id,
|
||||
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json), lambda _, v: v['file'])
|
||||
|
||||
def entries(segments, playlist=True):
|
||||
for i, segment in enumerate(segments, 1):
|
||||
yield {
|
||||
'id': f'{video_id}_{i}' if playlist else video_id,
|
||||
'title': f'{title} (Part {i})' if playlist else title,
|
||||
'description': description,
|
||||
'timestamp': traverse_obj(segment, ('time_start', {int_or_none})),
|
||||
'thumbnail': thumbnail,
|
||||
'formats': [{
|
||||
'url': urljoin('https://live.erinn.biz', segment['file']),
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
}],
|
||||
}
|
||||
|
||||
if len(sources) == 1:
|
||||
return next(entries(sources, playlist=False))
|
||||
|
||||
return self.playlist_result(entries(sources), video_id, title, description, multi_video=True)
|
|
@ -13,7 +13,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
|||
|
||||
_TESTS = [{
|
||||
'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
|
||||
'md5': 'e94de44cd80818084352fcf8de1ce82c',
|
||||
'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
|
||||
'info_dict': {
|
||||
'id': 'g9j7Eovo',
|
||||
'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
|
||||
|
@ -26,7 +26,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
|||
},
|
||||
}, {
|
||||
'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
|
||||
'md5': '0b3f10332b812034b3a3eda1ef877c5f',
|
||||
'md5': '319c662943dd777bab835cae1e2d73a5',
|
||||
'info_dict': {
|
||||
'id': 'LeAgybyc',
|
||||
'title': 'Intelligence artificielle : faut-il s’en méfier ?',
|
||||
|
@ -41,7 +41,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
|||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
|
||||
'md5': '3972ddf2d5f8b98699f191687258e2f9',
|
||||
'md5': '6289f9489efb969e38245f31721596fe',
|
||||
'info_dict': {
|
||||
'id': 'QChnbPYA',
|
||||
'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
|
||||
|
@ -55,7 +55,7 @@ class LeFigaroVideoEmbedIE(InfoExtractor):
|
|||
},
|
||||
}, {
|
||||
'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
|
||||
'md5': '3ac0a0769546ee6be41ab52caea5d9a9',
|
||||
'md5': 'f6df814cae53e85937621599d2967520',
|
||||
'info_dict': {
|
||||
'id': 'QJzqoNbf',
|
||||
'title': 'La philosophe Nathalie Sarthou-Lajus est l’invitée du Figaro Live',
|
||||
|
@ -73,7 +73,8 @@ def _real_extract(self, url):
|
|||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']['playerData']
|
||||
player_data = self._search_nextjs_data(
|
||||
webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
|
||||
|
||||
return self.url_result(
|
||||
f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),
|
||||
|
|
|
@ -3,16 +3,15 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
srt_subtitles_timecode,
|
||||
strip_or_none,
|
||||
mimetype2ext,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
@ -83,15 +82,29 @@ def _get_video_id(self, video_data, course_slug, video_slug):
|
|||
|
||||
|
||||
class LinkedInIE(LinkedInBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
|
||||
'info_dict': {
|
||||
'id': '6850898786781339649',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing',
|
||||
'description': 'md5:be125430bab1c574f16aeb186a4d5b19',
|
||||
'creator': 'Mishal K.'
|
||||
'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing #nowhiring #sendinblue…',
|
||||
'description': 'md5:2998a31f6f479376dd62831f53a80f71',
|
||||
'uploader': 'Mishal K.',
|
||||
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
|
||||
'like_count': int
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/posts/the-mathworks_2_what-is-mathworks-cloud-center-activity-7151241570371948544-4Gu7',
|
||||
'info_dict': {
|
||||
'id': '7151241570371948544',
|
||||
'ext': 'mp4',
|
||||
'title': 'MathWorks on LinkedIn: What Is MathWorks Cloud Center?',
|
||||
'description': 'md5:95f9d4eeb6337882fb47eefe13d7a40c',
|
||||
'uploader': 'MathWorks',
|
||||
'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$',
|
||||
'like_count': int,
|
||||
'subtitles': 'mincount:1'
|
||||
},
|
||||
}]
|
||||
|
||||
|
@ -99,26 +112,30 @@ def _real_extract(self, url):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_extract_title(webpage)
|
||||
description = clean_html(get_element_by_class('share-update-card__update-text', webpage))
|
||||
like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
|
||||
creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))
|
||||
|
||||
sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id)
|
||||
video_attrs = extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))
|
||||
sources = self._parse_json(video_attrs['data-sources'], video_id)
|
||||
formats = [{
|
||||
'url': source['src'],
|
||||
'ext': mimetype2ext(source.get('type')),
|
||||
'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
|
||||
} for source in sources]
|
||||
subtitles = {'en': [{
|
||||
'url': video_attrs['data-captions-url'],
|
||||
'ext': 'vtt',
|
||||
}]} if url_or_none(video_attrs.get('data-captions-url')) else {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'like_count': like_count,
|
||||
'creator': creator,
|
||||
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
|
||||
'like_count': int_or_none(self._search_regex(
|
||||
r'\bdata-num-reactions="(\d+)"', webpage, 'reactions', default=None)),
|
||||
'uploader': traverse_obj(
|
||||
self._yield_json_ld(webpage, video_id),
|
||||
(lambda _, v: v['@type'] == 'SocialMediaPosting', 'author', 'name', {str}), get_all=False),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'description': description,
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
|
|
282
yt_dlp/extractor/lsm.py
Normal file
282
yt_dlp/extractor/lsm.py
Normal file
|
@ -0,0 +1,282 @@
|
|||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LSMLREmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
(?:latvijasradio|lr1|lr2|klasika|lr4|naba|radioteatris)\.lsm|
|
||||
pieci
|
||||
)\.lv/[^/?#]+/(?:
|
||||
pleijeris|embed
|
||||
)/?\?(?:[^#]+&)?(?:show|id)=(?P<id>\d+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://latvijasradio.lsm.lv/lv/embed/?theme=black&size=16x9&showCaptions=0&id=183522',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9',
|
||||
'info_dict': {
|
||||
'id': '1270',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
'playlist': [{
|
||||
'md5': '2e61b6eceff00d14d57fdbbe6ab24cac',
|
||||
'info_dict': {
|
||||
'id': 'a297397',
|
||||
'ext': 'mp3',
|
||||
'title': 'Eriks Emanuels Šmits "Pilāta evaņģēlijs". 1. daļa',
|
||||
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f131ae81e3c.jpg',
|
||||
'duration': 3300,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1269&theme=white&size=16x9',
|
||||
'md5': '24810d4a961da2295d9860afdcaf4f5a',
|
||||
'info_dict': {
|
||||
'id': 'a230690',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku',
|
||||
'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg',
|
||||
'duration': 1788,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9',
|
||||
'info_dict': {
|
||||
'id': '166557',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist': [{
|
||||
'md5': '6a8b0927572f443f09c6e50a3ad65f2d',
|
||||
'info_dict': {
|
||||
'id': 'a303104',
|
||||
'ext': 'mp3',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
|
||||
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits',
|
||||
'duration': 3222,
|
||||
},
|
||||
}, {
|
||||
'md5': '5d5e191e718b7644e5118b7b4e093a6d',
|
||||
'info_dict': {
|
||||
'id': 'v303104',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/c/5/gallery_a83ad2c2.jpg',
|
||||
'title': 'Krustpunktā Lielā intervija: Valsts prezidents Egils Levits - Video Version',
|
||||
'duration': 3222,
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/embed/?id=183522&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr2.lsm.lv/lv/embed/?id=182126&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://klasika.lsm.lv/lv/embed/?id=110806&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr4.lsm.lv/lv/embed/?id=184282&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://pieci.lv/lv/embed/?id=168896&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://naba.lsm.lv/lv/embed/?id=182901&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://radioteatris.lsm.lv/lv/embed/?id=176439&show=0&theme=white&size=16x9',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://lr1.lsm.lv/lv/pleijeris/?embed=0&id=48205&time=00%3A00&idx=0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
video_id = traverse_obj(query, (
|
||||
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data, media_data = self._search_regex(
|
||||
r'LR\.audio\.Player\s*\([^{]*(?P<player>\{.*?\}),(?P<media>\{.*\})\);',
|
||||
webpage, 'player json', group=('player', 'media'))
|
||||
|
||||
player_json = self._parse_json(
|
||||
player_data, video_id, transform_source=js_to_json, fatal=False) or {}
|
||||
media_json = self._parse_json(media_data, video_id, transform_source=js_to_json)
|
||||
|
||||
entries = []
|
||||
for item in traverse_obj(media_json, (('audio', 'video'), lambda _, v: v['id'])):
|
||||
formats = []
|
||||
for source_url in traverse_obj(item, ('sources', ..., 'file', {url_or_none})):
|
||||
if determine_ext(source_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(source_url, video_id, fatal=False))
|
||||
else:
|
||||
formats.append({'url': source_url})
|
||||
|
||||
id_ = item['id']
|
||||
title = item.get('title')
|
||||
if id_.startswith('v') and not title:
|
||||
title = traverse_obj(
|
||||
media_json, ('audio', lambda _, v: v['id'][1:] == id_[1:], 'title',
|
||||
{lambda x: x and f'{x} - Video Version'}), get_all=False)
|
||||
|
||||
entries.append({
|
||||
'formats': formats,
|
||||
'thumbnail': urljoin(url, player_json.get('poster')),
|
||||
'id': id_,
|
||||
'title': title,
|
||||
'duration': traverse_obj(item, ('duration', {int_or_none})),
|
||||
})
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
||||
return self.playlist_result(entries, video_id)
|
||||
|
||||
|
||||
class LSMLTVEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://ltv\.lsm\.lv/embed\?(?:[^#]+&)?c=(?P<id>[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ltv.lsm.lv/embed?c=eyJpdiI6IjQzbHVUeHAyaDJiamFjcjdSUUFKdnc9PSIsInZhbHVlIjoiMHl3SnJNRmd2TmFIdnZwOGtGUUpzODFzUEZ4SVVsN2xoRjliSW9vckUyMWZIWG8vbWVzaFFkY0lhNmRjbjRpaCIsIm1hYyI6ImMzNjdhMzFhNTFhZmY1ZmE0NWI5YmFjZGI1YmJiNGEyNjgzNDM4MjUzMWEwM2FmMDMyZDMwYWM1MDFjZmM5MGIiLCJ0YWciOiIifQ==',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700589151,
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
|
||||
'md5': 'a1711e190fe680fdb68fd8413b378e87',
|
||||
'info_dict': {
|
||||
'id': 'wUnFArIPDSY',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'LTV_16plus',
|
||||
'release_date': '20220514',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCNMrnafwXD2XKeeQOyfkFCw',
|
||||
'view_count': int,
|
||||
'availability': 'public',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/wUnFArIPDSY/maxresdefault.jpg',
|
||||
'release_timestamp': 1652544074,
|
||||
'title': 'EIROVĪZIJA SALĀTOS',
|
||||
'live_status': 'was_live',
|
||||
'uploader_id': '@LTV16plus',
|
||||
'comment_count': int,
|
||||
'channel_id': 'UCNMrnafwXD2XKeeQOyfkFCw',
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'duration': 5269,
|
||||
'upload_date': '20220514',
|
||||
'age_limit': 0,
|
||||
'channel': 'LTV_16plus',
|
||||
'playable_in_embed': True,
|
||||
'tags': [],
|
||||
'uploader_url': 'https://www.youtube.com/@LTV16plus',
|
||||
'like_count': int,
|
||||
'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote(self._match_id(url))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_json(
|
||||
r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
|
||||
embed_type = traverse_obj(data, ('source', 'name', {str}))
|
||||
|
||||
if embed_type == 'telia':
|
||||
ie_key = 'CloudyCDN'
|
||||
embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
|
||||
elif embed_type == 'youtube':
|
||||
ie_key = 'Youtube'
|
||||
embed_url = traverse_obj(data, ('source', 'id', {str}))
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported embed type {embed_type!r}')
|
||||
|
||||
return self.url_result(
|
||||
embed_url, ie_key, video_id, **traverse_obj(data, {
|
||||
'title': ('parentInfo', 'title'),
|
||||
'duration': ('parentInfo', 'duration', {int_or_none}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}))
|
||||
|
||||
|
||||
class LSMReplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1700586300,
|
||||
'description': 'md5:0f1b14798cc39e1ae578bd0eb268f759',
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija',
|
||||
'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
|
||||
'upload_date': '20231102',
|
||||
'timestamp': 1698921060,
|
||||
'description': 'md5:7bac3b2dd41e44325032943251c357b1',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _fix_nuxt_data(self, webpage):
|
||||
return re.sub(r'Object\.create\(null(?:,(\{.+\}))?\)', lambda m: m.group(1) or 'null', webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._search_nuxt_data(
|
||||
self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
**traverse_obj(data, {
|
||||
'url': ('playback', 'service', 'url', {url_or_none}),
|
||||
'title': ('mediaItem', 'title'),
|
||||
'description': ('mediaItem', ('lead', 'body')),
|
||||
'duration': ('mediaItem', 'duration', {int_or_none}),
|
||||
'timestamp': ('mediaItem', 'aired_at', {parse_iso8601}),
|
||||
'thumbnail': ('mediaItem', 'largeThumbnail', {url_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
|
@ -28,12 +28,24 @@ class MagellanTVIE(InfoExtractor):
|
|||
'tags': ['Investigation', 'True Crime', 'Justice', 'Europe'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.magellantv.com/watch/celebration-nation',
|
||||
'info_dict': {
|
||||
'id': 'celebration-nation',
|
||||
'ext': 'mp4',
|
||||
'tags': ['Art & Culture', 'Human Interest', 'Anthropology', 'China', 'History'],
|
||||
'duration': 2640.0,
|
||||
'title': 'Ancestors',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['reactContext']['video']['detail']
|
||||
data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', 'reactContext',
|
||||
(('video', 'detail'), ('series', 'currentEpisode')), {dict}), get_all=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(data['jwpVideoUrl'], video_id)
|
||||
|
||||
return {
|
||||
|
|
62
yt_dlp/extractor/magentamusik.py
Normal file
62
yt_dlp/extractor/magentamusik.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MagentaMusikIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?magentamusik\.de/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.magentamusik.de/marty-friedman-woa-2023-9208205928595409235',
|
||||
'md5': 'd82dd4748f55fc91957094546aaf8584',
|
||||
'info_dict': {
|
||||
'id': '9208205928595409235',
|
||||
'display_id': 'marty-friedman-woa-2023-9208205928595409235',
|
||||
'ext': 'mp4',
|
||||
'title': 'Marty Friedman: W:O:A 2023',
|
||||
'alt_title': 'Konzert vom: 05.08.2023 13:00',
|
||||
'duration': 2760,
|
||||
'categories': ['Musikkonzert'],
|
||||
'release_year': 2023,
|
||||
'location': 'Deutschland',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_config = self._search_json(
|
||||
r'data-js-element="o-video-player__config">', webpage, 'player config', display_id, fatal=False)
|
||||
if not player_config:
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
asset_id = player_config['assetId']
|
||||
asset_details = self._download_json(
|
||||
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/assetdetails/58938/{asset_id}',
|
||||
display_id, note='Downloading asset details')
|
||||
|
||||
video_id = traverse_obj(
|
||||
asset_details, ('content', 'partnerInformation', ..., 'reference', {str}), get_all=False)
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract video id')
|
||||
|
||||
vod_data = self._download_json(
|
||||
f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/player/58935/{video_id}/Main%20Movie', video_id)
|
||||
smil_url = traverse_obj(
|
||||
vod_data, ('content', 'feature', 'representations', ...,
|
||||
'contentPackages', ..., 'media', 'href', {url_or_none}), get_all=False)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': self._extract_smil_formats(smil_url, video_id),
|
||||
**traverse_obj(vod_data, ('content', 'feature', 'metadata', {
|
||||
'title': 'title',
|
||||
'alt_title': 'originalTitle',
|
||||
'description': 'longDescription',
|
||||
'duration': ('runtimeInSeconds', {int_or_none}),
|
||||
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
|
||||
'release_year': ('yearOfProduction', {int_or_none}),
|
||||
'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
|
||||
})),
|
||||
}
|
|
@ -1,58 +0,0 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class MagentaMusik360IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?magenta-musik-360\.de/([a-z0-9-]+-(?P<id>[0-9]+)|festivals/.+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.magenta-musik-360.de/within-temptation-wacken-2019-1-9208205928595185932',
|
||||
'md5': '65b6f060b40d90276ec6fb9b992c1216',
|
||||
'info_dict': {
|
||||
'id': '9208205928595185932',
|
||||
'ext': 'm3u8',
|
||||
'title': 'WITHIN TEMPTATION',
|
||||
'description': 'Robert Westerholt und Sharon Janny den Adel gründeten die Symphonic Metal-Band. Privat sind die Niederländer ein Paar und haben zwei Kinder. Die Single Ice Queen brachte ihnen Platin und Gold und verhalf 2002 zum internationalen Durchbruch. Charakteristisch für die Band war Anfangs der hohe Gesang von Frontfrau Sharon. Stilistisch fing die Band im Gothic Metal an. Mit neuem Sound, schnellen Gitarrenriffs und Gitarrensoli, avancierte Within Temptation zur erfolgreichen Rockband. Auch dieses Jahr wird die Band ihre Fangemeinde wieder mitreißen.',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.magenta-musik-360.de/festivals/wacken-world-wide-2020-body-count-feat-ice-t',
|
||||
'md5': '81010d27d7cab3f7da0b0f681b983b7e',
|
||||
'info_dict': {
|
||||
'id': '9208205928595231363',
|
||||
'ext': 'm3u8',
|
||||
'title': 'Body Count feat. Ice-T',
|
||||
'description': 'Body Count feat. Ice-T konnten bereits im vergangenen Jahr auf dem „Holy Ground“ in Wacken überzeugen. 2020 gehen die Crossover-Metaller aus einem Club in Los Angeles auf Sendung und bringen mit ihrer Mischung aus Metal und Hip-Hop Abwechslung und ordentlich Alarm zum WWW. Bereits seit 1990 stehen die beiden Gründer Ice-T (Gesang) und Ernie C (Gitarre) auf der Bühne. Sieben Studioalben hat die Gruppe bis jetzt veröffentlicht, darunter das Debüt „Body Count“ (1992) mit dem kontroversen Track „Cop Killer“.',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
# _match_id casts to string, but since "None" is not a valid video_id for magenta
|
||||
# there is no risk for confusion
|
||||
if video_id == "None":
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_id = self._html_search_regex(r'data-asset-id="([^"]+)"', webpage, 'video_id')
|
||||
json = self._download_json("https://wcps.t-online.de/cvss/magentamusic/vodplayer/v3/player/58935/%s/Main%%20Movie" % video_id, video_id)
|
||||
xml_url = json['content']['feature']['representations'][0]['contentPackages'][0]['media']['href']
|
||||
metadata = json['content']['feature'].get('metadata')
|
||||
title = None
|
||||
description = None
|
||||
duration = None
|
||||
thumbnails = []
|
||||
if metadata:
|
||||
title = metadata.get('title')
|
||||
description = metadata.get('fullDescription')
|
||||
duration = metadata.get('runtimeInSeconds')
|
||||
for img_key in ('teaserImageWide', 'smallCoverImage'):
|
||||
if img_key in metadata:
|
||||
thumbnails.append({'url': metadata[img_key].get('href')})
|
||||
|
||||
xml = self._download_xml(xml_url, video_id)
|
||||
final_url = xml[0][0][0].attrib['src']
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'url': final_url,
|
||||
'duration': duration,
|
||||
'thumbnails': thumbnails
|
||||
}
|
|
@ -8,7 +8,8 @@
|
|||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
|
@ -16,7 +17,7 @@ class MedalTVIE(InfoExtractor):
|
|||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
||||
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
||||
'md5': '03e4911fdcf7fce563090705c2e79267',
|
||||
'info_dict': {
|
||||
'id': 'jTBFnLKdLy15K',
|
||||
'ext': 'mp4',
|
||||
|
@ -33,8 +34,8 @@ class MedalTVIE(InfoExtractor):
|
|||
'duration': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH',
|
||||
'md5': '3d19d426fe0b2d91c26e412684e66a06',
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH',
|
||||
'md5': 'fc7a3e4552ae8993c1c4006db46be447',
|
||||
'info_dict': {
|
||||
'id': '2mA60jWAGQCBH',
|
||||
'ext': 'mp4',
|
||||
|
@ -52,7 +53,7 @@ class MedalTVIE(InfoExtractor):
|
|||
'duration': 23,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA',
|
||||
'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA',
|
||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||
'info_dict': {
|
||||
'id': '2um24TWdty0NA',
|
||||
|
@ -81,7 +82,7 @@ class MedalTVIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
webpage = self._download_webpage(update_url_query(url, {'mobilebypass': 'true'}), video_id)
|
||||
|
||||
hydration_data = self._search_json(
|
||||
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||
|
|
|
@ -355,11 +355,11 @@ class MLBArticleIE(InfoExtractor):
|
|||
'info_dict': {
|
||||
'id': '36db7394-343c-4ea3-b8ca-ead2e61bca9a',
|
||||
'title': 'Machado\'s grab draws hilarious irate reaction',
|
||||
'modified_timestamp': 1650130737,
|
||||
'modified_timestamp': 1675888370,
|
||||
'description': 'md5:a19d4eb0487b2cb304e9a176f6b67676',
|
||||
'modified_date': '20220416',
|
||||
'modified_date': '20230208',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -367,15 +367,13 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(url, display_id)
|
||||
apollo_cache_json = self._search_json(r'window\.initState\s*=', webpage, 'window.initState', display_id)['apolloCache']
|
||||
|
||||
content_data_id = traverse_obj(
|
||||
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getForgeContent'), 'id'), get_all=False)
|
||||
|
||||
content_real_info = apollo_cache_json[content_data_id]
|
||||
content_real_info = traverse_obj(
|
||||
apollo_cache_json, ('ROOT_QUERY', lambda k, _: k.startswith('getArticle')), get_all=False)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
traverse_obj(content_real_info, ('parts', lambda _, v: v['typename'] == 'Video', 'id')),
|
||||
getter=lambda x: f'https://www.mlb.com/video/{apollo_cache_json[x]["slug"]}',
|
||||
ie=MLBVideoIE, playlist_id=content_real_info.get('_translationId'),
|
||||
traverse_obj(content_real_info, ('parts', lambda _, v: v['__typename'] == 'Video' or v['type'] == 'video')),
|
||||
getter=lambda x: f'https://www.mlb.com/video/{x["slug"]}',
|
||||
ie=MLBVideoIE, playlist_id=content_real_info.get('translationId'),
|
||||
title=self._html_search_meta('og:title', webpage),
|
||||
description=content_real_info.get('summary'),
|
||||
modified_timestamp=parse_iso8601(content_real_info.get('lastUpdatedDate')))
|
||||
|
|
|
@ -177,6 +177,7 @@ def _real_extract(self, url):
|
|||
|
||||
|
||||
class MotherlessPaginatedIE(InfoExtractor):
|
||||
_EXTRA_QUERY = {}
|
||||
_PAGE_SIZE = 60
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
|
@ -199,7 +200,7 @@ def _real_extract(self, url):
|
|||
def get_page(idx):
|
||||
page = idx + 1
|
||||
current_page = webpage if not idx else self._download_webpage(
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page})
|
||||
real_url, item_id, note=f'Downloading page {page}', query={'page': page, **self._EXTRA_QUERY})
|
||||
yield from self._extract_entries(current_page, real_url)
|
||||
|
||||
return self.playlist_result(
|
||||
|
@ -213,7 +214,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE):
|
|||
'url': 'http://motherless.com/gv/movie_scenes',
|
||||
'info_dict': {
|
||||
'id': 'movie_scenes',
|
||||
'title': 'Movie Scenes',
|
||||
'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully',
|
||||
},
|
||||
'playlist_mincount': 540,
|
||||
}, {
|
||||
|
@ -244,7 +245,7 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
|
|||
'id': '338999F',
|
||||
'title': 'Random',
|
||||
},
|
||||
'playlist_mincount': 190,
|
||||
'playlist_mincount': 171,
|
||||
}, {
|
||||
'url': 'https://motherless.com/GVABD6213',
|
||||
'info_dict': {
|
||||
|
@ -270,3 +271,27 @@ class MotherlessGalleryIE(MotherlessPaginatedIE):
|
|||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/GV{item_id}')
|
||||
|
||||
|
||||
class MotherlessUploaderIE(MotherlessPaginatedIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?motherless\.com/u/(?P<id>\w+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://motherless.com/u/Mrgo4hrs2023',
|
||||
'info_dict': {
|
||||
'id': 'Mrgo4hrs2023',
|
||||
'title': "Mrgo4hrs2023's Uploads - Videos",
|
||||
},
|
||||
'playlist_mincount': 32,
|
||||
}, {
|
||||
'url': 'https://motherless.com/u/Happy_couple?t=v',
|
||||
'info_dict': {
|
||||
'id': 'Happy_couple',
|
||||
'title': "Happy_couple's Uploads - Videos",
|
||||
},
|
||||
'playlist_mincount': 8,
|
||||
}]
|
||||
|
||||
_EXTRA_QUERY = {'t': 'v'}
|
||||
|
||||
def _correct_path(self, url, item_id):
|
||||
return urllib.parse.urljoin(url, f'/u/{item_id}?t=v')
|
||||
|
|
171
yt_dlp/extractor/mx3.py
Normal file
171
yt_dlp/extractor/mx3.py
Normal file
|
@ -0,0 +1,171 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
try_call,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Mx3BaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P<id>\w+)'
|
||||
_FORMATS = [{
|
||||
'url': 'player_asset',
|
||||
'format_id': 'default',
|
||||
'quality': 0,
|
||||
}, {
|
||||
'url': 'player_asset?quality=hd',
|
||||
'format_id': 'hd',
|
||||
'quality': 1,
|
||||
}, {
|
||||
'url': 'download',
|
||||
'format_id': 'download',
|
||||
'quality': 2,
|
||||
}, {
|
||||
'url': 'player_asset?quality=source',
|
||||
'format_id': 'source',
|
||||
'quality': 2,
|
||||
}]
|
||||
|
||||
def _extract_formats(self, track_id):
|
||||
formats = []
|
||||
for fmt in self._FORMATS:
|
||||
format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}'
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), track_id, fatal=False, expected_status=404,
|
||||
note=f'Checking for format {fmt["format_id"]}')
|
||||
if urlh and urlh.status == 200:
|
||||
formats.append({
|
||||
**fmt,
|
||||
'url': format_url,
|
||||
'ext': urlhandle_detect_ext(urlh),
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
track_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, track_id)
|
||||
more_info = get_element_by_class('single-more-info', webpage)
|
||||
data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False)
|
||||
|
||||
def get_info_field(name):
|
||||
return self._html_search_regex(
|
||||
rf'<dt[^>]*>\s*{name}\s*</dt>\s*<dd[^>]*>(.*?)</dd>',
|
||||
more_info, name, default=None, flags=re.DOTALL)
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'formats': self._extract_formats(track_id),
|
||||
'genre': self._html_search_regex(
|
||||
r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>', webpage, 'genre', default=None),
|
||||
'release_year': int_or_none(get_info_field('Year of creation')),
|
||||
'description': get_info_field('Description'),
|
||||
'tags': try_call(lambda: get_info_field('Tag').split(', '), list),
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'artist': (('performer_name', 'artist'), {str}),
|
||||
'album_artist': ('artist', {str}),
|
||||
'composer': ('composer_name', {str}),
|
||||
'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
|
||||
class Mx3IE(Mx3BaseIE):
|
||||
_DOMAIN = 'mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://mx3.ch/t/1Cru',
|
||||
'md5': '7ba09e9826b4447d4e1ce9d69e0e295f',
|
||||
'info_dict': {
|
||||
'id': '1Cru',
|
||||
'ext': 'wav',
|
||||
'artist': 'Godina',
|
||||
'album_artist': 'Tortue Tortue',
|
||||
'composer': 'Olivier Godinat',
|
||||
'genre': 'Rock',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
|
||||
'title': "S'envoler",
|
||||
'release_year': 2021,
|
||||
'tags': [],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mx3.ch/t/1LIY',
|
||||
'md5': '48293cb908342547827f963a5a2e9118',
|
||||
'info_dict': {
|
||||
'id': '1LIY',
|
||||
'ext': 'mov',
|
||||
'artist': 'Tania Kimfumu',
|
||||
'album_artist': 'The Broots',
|
||||
'composer': 'Emmanuel Diserens',
|
||||
'genre': 'Electro',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
|
||||
'title': 'The Broots-Larytta remix "Begging For Help"',
|
||||
'release_year': 2023,
|
||||
'tags': ['the broots', 'cassata records', 'larytta'],
|
||||
'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mx3.ch/t/1C6E',
|
||||
'md5': '1afcd578493ddb8e5008e94bb6d97e25',
|
||||
'info_dict': {
|
||||
'id': '1C6E',
|
||||
'ext': 'wav',
|
||||
'artist': 'Alien Bubblegum',
|
||||
'album_artist': 'Alien Bubblegum',
|
||||
'composer': 'Alien Bubblegum',
|
||||
'genre': 'Punk',
|
||||
'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
|
||||
'title': 'Wide Awake',
|
||||
'release_year': 2021,
|
||||
'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'],
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class Mx3NeoIE(Mx3BaseIE):
|
||||
_DOMAIN = 'neo.mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://neo.mx3.ch/t/1hpd',
|
||||
'md5': '6d9986bbae5cac3296ec8813bf965eb2',
|
||||
'info_dict': {
|
||||
'id': '1hpd',
|
||||
'ext': 'wav',
|
||||
'artist': 'Baptiste Lopez',
|
||||
'album_artist': 'Kammerorchester Basel',
|
||||
'composer': 'Jannik Giger',
|
||||
'genre': 'Composition, Orchestra',
|
||||
'title': 'Troisième œil. Für Kammerorchester (2023)',
|
||||
'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
|
||||
'release_year': 2023,
|
||||
'tags': [],
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
class Mx3VolksmusikIE(Mx3BaseIE):
|
||||
_DOMAIN = 'volksmusik.mx3.ch'
|
||||
_VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
|
||||
_TESTS = [{
|
||||
'url': 'https://volksmusik.mx3.ch/t/Zx',
|
||||
'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c',
|
||||
'info_dict': {
|
||||
'id': 'Zx',
|
||||
'ext': 'mp3',
|
||||
'artist': 'Ländlerkapelle GrischArt',
|
||||
'album_artist': 'Ländlerkapelle GrischArt',
|
||||
'composer': 'Urs Glauser',
|
||||
'genre': 'Instrumental, Graubünden',
|
||||
'title': 'Chämilouf',
|
||||
'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
|
||||
'release_year': 2012,
|
||||
'tags': [],
|
||||
}
|
||||
}]
|
|
@ -1,20 +1,25 @@
|
|||
import base64
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
import time
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
|
@ -110,6 +115,18 @@ def get_subs(caption_url):
|
|||
**self.process_subtitles(video_data, get_subs),
|
||||
}
|
||||
|
||||
def _call_api(self, path, video_id):
|
||||
api_endpoint = f'https://apis.naver.com/now_web2/now_web_api/v1{path}'
|
||||
key = b'nbxvs5nwNG9QKEWK0ADjYA4JZoujF4gHcIwvoCxFTPAeamq5eemvt5IWAYXxrbYM'
|
||||
msgpad = int(time.time() * 1000)
|
||||
md = base64.b64encode(hmac.HMAC(
|
||||
key, f'{api_endpoint[:255]}{msgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
|
||||
|
||||
return self._download_json(api_endpoint, video_id=video_id, headers=self.geo_verification_headers(), query={
|
||||
'msgpad': msgpad,
|
||||
'md': md,
|
||||
})['result']
|
||||
|
||||
|
||||
class NaverIE(NaverBaseIE):
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
|
||||
|
@ -125,21 +142,32 @@ class NaverIE(NaverBaseIE):
|
|||
'upload_date': '20130903',
|
||||
'uploader': '메가스터디, 합격불변의 법칙',
|
||||
'uploader_id': 'megastudy',
|
||||
'uploader_url': 'https://tv.naver.com/megastudy',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 2118,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://tv.naver.com/v/395837',
|
||||
'md5': '8a38e35354d26a17f73f4e90094febd3',
|
||||
'md5': '7791205fa89dbed2f5e3eb16d287ff05',
|
||||
'info_dict': {
|
||||
'id': '395837',
|
||||
'ext': 'mp4',
|
||||
'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
|
||||
'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
|
||||
'description': 'md5:c76be23e21403a6473d8119678cdb5cb',
|
||||
'timestamp': 1432030253,
|
||||
'upload_date': '20150519',
|
||||
'uploader': '4가지쇼 시즌2',
|
||||
'uploader_id': 'wrappinguser29',
|
||||
'uploader': '4가지쇼',
|
||||
'uploader_id': '4show',
|
||||
'uploader_url': 'https://tv.naver.com/4show',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'duration': 277,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
'skip': 'Georestricted',
|
||||
}, {
|
||||
'url': 'http://tvcast.naver.com/v/81652',
|
||||
'only_matching': True,
|
||||
|
@ -147,56 +175,63 @@ class NaverIE(NaverBaseIE):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://tv.naver.com/api/json/v/' + video_id,
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
player_info_json = content.get('playerInfoJson') or {}
|
||||
current_clip = player_info_json.get('currentClip') or {}
|
||||
data = self._call_api(f'/clips/{video_id}/play-info', video_id)
|
||||
|
||||
vid = current_clip.get('videoId')
|
||||
in_key = current_clip.get('inKey')
|
||||
vid = traverse_obj(data, ('clip', 'videoId', {str}))
|
||||
in_key = traverse_obj(data, ('play', 'inKey', {str}))
|
||||
|
||||
if not vid or not in_key:
|
||||
player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
|
||||
if player_auth == 'notCountry':
|
||||
self.raise_geo_restricted(countries=['KR'])
|
||||
elif player_auth == 'notLogin':
|
||||
self.raise_login_required()
|
||||
raise ExtractorError('couldn\'t extract vid and key')
|
||||
raise ExtractorError('Unable to extract video info')
|
||||
|
||||
info = self._extract_video_info(video_id, vid, in_key)
|
||||
info.update({
|
||||
'description': clean_html(current_clip.get('description')),
|
||||
'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
|
||||
'duration': parse_duration(current_clip.get('displayPlayTime')),
|
||||
'like_count': int_or_none(current_clip.get('recommendPoint')),
|
||||
'age_limit': 19 if current_clip.get('adult') else None,
|
||||
})
|
||||
info.update(traverse_obj(data, ('clip', {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'timestamp': ('firstExposureDatetime', {parse_iso8601}),
|
||||
'duration': ('playTime', {int_or_none}),
|
||||
'like_count': ('likeItCount', {int_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'uploader': 'channelName',
|
||||
'uploader_id': 'channelId',
|
||||
'uploader_url': ('channelUrl', {url_or_none}),
|
||||
'age_limit': ('adultVideo', {lambda x: 19 if x else None}),
|
||||
})))
|
||||
return info
|
||||
|
||||
|
||||
class NaverLiveIE(InfoExtractor):
|
||||
class NaverLiveIE(NaverBaseIE):
|
||||
IE_NAME = 'Naver:live'
|
||||
_VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)'
|
||||
_GEO_BYPASS = False
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.naver.com/l/52010',
|
||||
'url': 'https://tv.naver.com/l/127062',
|
||||
'info_dict': {
|
||||
'id': '52010',
|
||||
'id': '127062',
|
||||
'ext': 'mp4',
|
||||
'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"',
|
||||
'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3',
|
||||
'channel_id': 'NTV-ytnnews24-0',
|
||||
'start_time': 1597026780000,
|
||||
'live_status': 'is_live',
|
||||
'channel': '뉴스는 YTN',
|
||||
'channel_id': 'ytnnews24',
|
||||
'title': 're:^대한민국 24시간 뉴스 채널 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:f938b5956711beab6f882314ffadf4d5',
|
||||
'start_time': 1677752280,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.naver.com/l/51549',
|
||||
'url': 'https://tv.naver.com/l/140535',
|
||||
'info_dict': {
|
||||
'id': '51549',
|
||||
'id': '140535',
|
||||
'ext': 'mp4',
|
||||
'title': '연합뉴스TV - 코로나19 뉴스특보',
|
||||
'description': 'md5:c655e82091bc21e413f549c0eaccc481',
|
||||
'channel_id': 'NTV-yonhapnewstv-0',
|
||||
'start_time': 1596406380000,
|
||||
'live_status': 'is_live',
|
||||
'channel': 'KBS뉴스',
|
||||
'channel_id': 'kbsnews',
|
||||
'start_time': 1696867320,
|
||||
'title': 're:^언제 어디서나! KBS 뉴스 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'description': 'md5:6ad419c0bf2f332829bda3f79c295284',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
|
||||
'like_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.naver.com/l/54887',
|
||||
|
@ -205,55 +240,27 @@ class NaverLiveIE(InfoExtractor):
|
|||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
page = self._download_webpage(url, video_id, 'Downloading Page', 'Unable to download Page')
|
||||
secure_url = self._search_regex(r'sApiF:\s+(?:"|\')([^"\']+)', page, 'secureurl')
|
||||
|
||||
info = self._extract_video_info(video_id, secure_url)
|
||||
info.update({
|
||||
'description': self._og_search_description(page)
|
||||
})
|
||||
|
||||
return info
|
||||
|
||||
def _extract_video_info(self, video_id, url):
|
||||
video_data = self._download_json(url, video_id, headers=self.geo_verification_headers())
|
||||
meta = video_data.get('meta')
|
||||
status = meta.get('status')
|
||||
data = self._call_api(f'/live-end/normal/{video_id}/play-info?renewLastPlayDate=true', video_id)
|
||||
|
||||
status = traverse_obj(data, ('live', 'liveStatus'))
|
||||
if status == 'CLOSED':
|
||||
raise ExtractorError('Stream is offline.', expected=True)
|
||||
elif status != 'OPENED':
|
||||
raise ExtractorError('Unknown status %s' % status)
|
||||
|
||||
title = meta.get('title')
|
||||
stream_list = video_data.get('streams')
|
||||
|
||||
if stream_list is None:
|
||||
raise ExtractorError('Could not get stream data.', expected=True)
|
||||
|
||||
formats = []
|
||||
for quality in stream_list:
|
||||
if not quality.get('url'):
|
||||
continue
|
||||
|
||||
prop = quality.get('property')
|
||||
if prop.get('abr'): # This abr doesn't mean Average audio bitrate.
|
||||
continue
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
quality.get('url'), video_id, 'mp4',
|
||||
m3u8_id=quality.get('qualityId'), live=True
|
||||
))
|
||||
raise ExtractorError(f'Unknown status {status!r}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'channel_id': meta.get('channelId'),
|
||||
'channel_url': meta.get('channelUrl'),
|
||||
'thumbnail': meta.get('imgUrl'),
|
||||
'start_time': meta.get('startTime'),
|
||||
'categories': [meta.get('categoryId')],
|
||||
'formats': self._extract_m3u8_formats(
|
||||
traverse_obj(data, ('playbackBody', {json.loads}, 'media', 0, 'path')), video_id, live=True),
|
||||
**traverse_obj(data, ('live', {
|
||||
'title': 'title',
|
||||
'channel': 'channelName',
|
||||
'channel_id': 'channelId',
|
||||
'description': 'description',
|
||||
'like_count': (('likeCount', 'likeItCount'), {int_or_none}),
|
||||
'thumbnail': ('thumbnailImageUrl', {url_or_none}),
|
||||
'start_time': (('startTime', 'startDateTime', 'startYmdt'), {parse_iso8601}),
|
||||
}), get_all=False),
|
||||
'is_live': True
|
||||
}
|
||||
|
||||
|
|
|
@ -3,15 +3,15 @@
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
OnDemandPagedList,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
|
@ -263,19 +263,16 @@ class NewgroundsUserIE(InfoExtractor):
|
|||
def _fetch_page(self, channel_id, url, page):
|
||||
page += 1
|
||||
posts_info = self._download_json(
|
||||
f'{url}/page/{page}', channel_id,
|
||||
f'{url}?page={page}', channel_id,
|
||||
note=f'Downloading page {page}', headers={
|
||||
'Accept': 'application/json, text/javascript, */*; q = 0.01',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
sequence = posts_info.get('sequence', [])
|
||||
for year in sequence:
|
||||
posts = try_get(posts_info, lambda x: x['years'][str(year)]['items'])
|
||||
for post in posts:
|
||||
path, media_id = self._search_regex(
|
||||
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
|
||||
post, 'url', group=(1, 2))
|
||||
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
|
||||
for post in traverse_obj(posts_info, ('items', ..., ..., {str})):
|
||||
path, media_id = self._search_regex(
|
||||
r'<a[^>]+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>',
|
||||
post, 'url', group=(1, 2))
|
||||
yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
|
|
|
@ -1,10 +1,54 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
merge_dicts,
|
||||
parse_count,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NFBIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P<id>[^/?#&]+)'
|
||||
class NFBBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<site>nfb|onf)\.ca'
|
||||
_GEO_COUNTRIES = ['CA']
|
||||
|
||||
def _extract_ep_data(self, webpage, video_id, fatal=False):
|
||||
return self._search_json(
|
||||
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
|
||||
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
|
||||
|
||||
def _extract_ep_info(self, data, video_id, slug=None):
|
||||
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'uploader': ('data_layer', 'episodeMaker', {str}),
|
||||
'release_year': ('data_layer', 'episodeYear', {int_or_none}),
|
||||
'episode': ('data_layer', 'episodeTitle', {str}),
|
||||
'season': ('data_layer', 'seasonTitle', {str}),
|
||||
'season_number': ('data_layer', 'seasonTitle', {parse_count}),
|
||||
'series': ('data_layer', 'seriesTitle', {str}),
|
||||
}), get_all=False)
|
||||
|
||||
return {
|
||||
**info,
|
||||
'id': video_id,
|
||||
'title': join_nonempty('series', 'episode', from_dict=info, delim=' - '),
|
||||
'episode_number': int_or_none(self._search_regex(
|
||||
r'[/-]e(?:pisode)?-?(\d+)(?:[/-]|$)', slug or video_id, 'episode number', default=None)),
|
||||
}
|
||||
|
||||
|
||||
class NFBIE(NFBBaseIE):
|
||||
IE_NAME = 'nfb'
|
||||
IE_DESC = 'nfb.ca and onf.ca films and episodes'
|
||||
_VALID_URL = [
|
||||
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>film)/(?P<id>[^/?#&]+)',
|
||||
rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+/s(?:ea|ai)son\d+/episode\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'note': 'NFB film',
|
||||
'url': 'https://www.nfb.ca/film/trafficopter/',
|
||||
'info_dict': {
|
||||
'id': 'trafficopter',
|
||||
|
@ -14,29 +58,192 @@ class NFBIE(InfoExtractor):
|
|||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Barrie Howells',
|
||||
'release_year': 1972,
|
||||
'duration': 600.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF film',
|
||||
'url': 'https://www.onf.ca/film/mal-du-siecle/',
|
||||
'info_dict': {
|
||||
'id': 'mal-du-siecle',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le mal du siècle',
|
||||
'description': 'md5:1abf774d77569ebe603419f2d344102b',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Catherine Lepage',
|
||||
'release_year': 2019,
|
||||
'duration': 300.0,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with English title',
|
||||
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/season1/episode9/',
|
||||
'info_dict': {
|
||||
'id': 'true-north-episode9-true-north-finale-making-it',
|
||||
'ext': 'mp4',
|
||||
'title': 'True North: Inside the Rise of Toronto Basketball - Finale: Making It',
|
||||
'description': 'We catch up with each player in the midst of their journey as they reflect on their road ahead.',
|
||||
'series': 'True North: Inside the Rise of Toronto Basketball',
|
||||
'release_year': 2018,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Finale: Making It',
|
||||
'episode_number': 9,
|
||||
'uploader': 'Ryan Sidhoo',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with French title',
|
||||
'url': 'https://www.onf.ca/serie/direction-nord-la-montee-du-basketball-a-toronto/saison1/episode9/',
|
||||
'info_dict': {
|
||||
'id': 'direction-nord-episode-9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Direction nord – La montée du basketball à Toronto - Finale : Réussir',
|
||||
'description': 'md5:349a57419b71432b97bf6083d92b029d',
|
||||
'series': 'Direction nord – La montée du basketball à Toronto',
|
||||
'release_year': 2018,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Finale : Réussir',
|
||||
'episode_number': 9,
|
||||
'uploader': 'Ryan Sidhoo',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with French title (needs geo-bypass)',
|
||||
'url': 'https://www.nfb.ca/series/etoile-du-nord/saison1/episode1/',
|
||||
'info_dict': {
|
||||
'id': 'etoile-du-nord-episode-1-lobservation',
|
||||
'ext': 'mp4',
|
||||
'title': 'Étoile du Nord - L\'observation',
|
||||
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
|
||||
'series': 'Étoile du Nord',
|
||||
'release_year': 2023,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'L\'observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with English title (needs geo-bypass)',
|
||||
'url': 'https://www.onf.ca/serie/north-star/season1/episode1/',
|
||||
'info_dict': {
|
||||
'id': 'north-star-episode-1-observation',
|
||||
'ext': 'mp4',
|
||||
'title': 'North Star - Observation',
|
||||
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
|
||||
'series': 'North Star',
|
||||
'release_year': 2023,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB episode with /film/ URL and English title (needs geo-bypass)',
|
||||
'url': 'https://www.nfb.ca/film/north-star-episode-1-observation/',
|
||||
'info_dict': {
|
||||
'id': 'north-star-episode-1-observation',
|
||||
'ext': 'mp4',
|
||||
'title': 'North Star - Observation',
|
||||
'description': 'md5:c727f370839d8a817392b9e3f23655c7',
|
||||
'series': 'North Star',
|
||||
'release_year': 2023,
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'ONF episode with /film/ URL and French title (needs geo-bypass)',
|
||||
'url': 'https://www.onf.ca/film/etoile-du-nord-episode-1-lobservation/',
|
||||
'info_dict': {
|
||||
'id': 'etoile-du-nord-episode-1-lobservation',
|
||||
'ext': 'mp4',
|
||||
'title': 'Étoile du Nord - L\'observation',
|
||||
'description': 'md5:161a4617260dee3de70f509b2c9dd21b',
|
||||
'series': 'Étoile du Nord',
|
||||
'release_year': 2023,
|
||||
'season': 'Saison 1',
|
||||
'season_number': 1,
|
||||
'episode': 'L\'observation',
|
||||
'episode_number': 1,
|
||||
'uploader': 'Patrick Bossé',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'Season 2 episode w/o episode num in id, extract from json ld',
|
||||
'url': 'https://www.onf.ca/film/liste-des-choses-qui-existent-saison-2-ours',
|
||||
'info_dict': {
|
||||
'id': 'liste-des-choses-qui-existent-saison-2-ours',
|
||||
'ext': 'mp4',
|
||||
'title': 'La liste des choses qui existent - L\'ours en peluche',
|
||||
'description': 'md5:d5e8d8fc5f3a7385a9cf0f509b37e28a',
|
||||
'series': 'La liste des choses qui existent',
|
||||
'release_year': 2022,
|
||||
'season': 'Saison 2',
|
||||
'season_number': 2,
|
||||
'episode': 'L\'ours en peluche',
|
||||
'episode_number': 12,
|
||||
'uploader': 'Francis Papillon',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'note': 'NFB film /embed/player/ page',
|
||||
'url': 'https://www.nfb.ca/film/afterlife/embed/player/',
|
||||
'info_dict': {
|
||||
'id': 'afterlife',
|
||||
'ext': 'mp4',
|
||||
'title': 'Afterlife',
|
||||
'description': 'md5:84951394f594f1fb1e62d9c43242fdf5',
|
||||
'release_year': 1978,
|
||||
'duration': 420.0,
|
||||
'uploader': 'Ishu Patel',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
site, type_, slug = self._match_valid_url(url).group('site', 'type', 'id')
|
||||
# Need to construct the URL since we match /embed/player/ URLs as well
|
||||
webpage, urlh = self._download_webpage_handle(f'https://www.{site}.ca/{type_}/{slug}/', slug)
|
||||
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
||||
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
||||
|
||||
webpage = self._download_webpage('https://www.nfb.ca/film/%s/' % video_id, video_id)
|
||||
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
|
||||
video_id = self._match_id(embed_url) # embed url has unique slug
|
||||
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
|
||||
if 'MESSAGE_GEOBLOCKED' in player:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
|
||||
iframe = self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*src=["\']([^"\']+)',
|
||||
webpage, 'iframe', default=None, fatal=True)
|
||||
if iframe.startswith('/'):
|
||||
iframe = f'https://www.nfb.ca{iframe}'
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
|
||||
video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
player = self._download_webpage(iframe, video_id)
|
||||
if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt['format_note'] = 'described video'
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
source = self._html_search_regex(
|
||||
r'source:\s*\'([^\']+)',
|
||||
player, 'source', default=None, fatal=True)
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
|
||||
|
||||
return {
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': self._html_search_regex(
|
||||
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
|
||||
|
@ -45,14 +252,49 @@ def _real_extract(self, url):
|
|||
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
||||
webpage, 'description', default=None),
|
||||
'thumbnail': self._html_search_regex(
|
||||
r'poster:\s*\'([^\']+)',
|
||||
player, 'thumbnail', default=None),
|
||||
r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
|
||||
'uploader': self._html_search_regex(
|
||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
||||
webpage, 'uploader', default=None),
|
||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
|
||||
'release_year': int_or_none(self._html_search_regex(
|
||||
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
||||
webpage, 'release_year', default=None)),
|
||||
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
||||
|
||||
return merge_dicts({
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
}, info, self._search_json_ld(webpage, video_id, default={}))
|
||||
|
||||
|
||||
class NFBSeriesIE(NFBBaseIE):
|
||||
IE_NAME = 'nfb:series'
|
||||
IE_DESC = 'nfb.ca and onf.ca series'
|
||||
_VALID_URL = rf'{NFBBaseIE._VALID_URL_BASE}/(?P<type>series?)/(?P<id>[^/?#&]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nfb.ca/series/true-north-inside-the-rise-of-toronto-basketball/',
|
||||
'playlist_mincount': 9,
|
||||
'info_dict': {
|
||||
'id': 'true-north-inside-the-rise-of-toronto-basketball',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.onf.ca/serie/la-liste-des-choses-qui-existent-serie/',
|
||||
'playlist_mincount': 26,
|
||||
'info_dict': {
|
||||
'id': 'la-liste-des-choses-qui-existent-serie',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, episodes):
|
||||
for episode in traverse_obj(episodes, lambda _, v: NFBIE.suitable(v['embed_url'])):
|
||||
mobj = NFBIE._match_valid_url(episode['embed_url'])
|
||||
yield self.url_result(
|
||||
mobj[0], NFBIE, **self._extract_ep_info([episode], mobj.group('id')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, type_, series_id = self._match_valid_url(url).group('site', 'type', 'id')
|
||||
season_path = 'saison' if type_ == 'serie' else 'season'
|
||||
webpage = self._download_webpage(
|
||||
f'https://www.{site}.ca/{type_}/{series_id}/{season_path}1/episode1', series_id)
|
||||
episodes = self._extract_ep_data(webpage, series_id, fatal=True)
|
||||
|
||||
return self.playlist_result(self._entries(episodes), series_id)
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
join_nonempty,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
|
@ -473,22 +474,21 @@ class NhkRadiruIE(InfoExtractor):
|
|||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
|
||||
'skip': 'Episode expired on 2023-04-16',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210',
|
||||
'skip': 'Episode expired on 2024-02-24',
|
||||
'info_dict': {
|
||||
'channel': 'NHK-FM',
|
||||
'uploader': 'NHK-FM',
|
||||
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
|
||||
'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス',
|
||||
'id': '0449_01_3926210',
|
||||
'ext': 'm4a',
|
||||
'id': '0449_01_3853544',
|
||||
'series': 'ジャズ・トゥナイト',
|
||||
'uploader': 'NHK-FM',
|
||||
'channel': 'NHK-FM',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||
'timestamp': 1680969600,
|
||||
'title': 'ジャズ・トゥナイト NEWジャズ特集',
|
||||
'upload_date': '20230408',
|
||||
'release_timestamp': 1680962400,
|
||||
'release_date': '20230408',
|
||||
'was_live': True,
|
||||
'release_date': '20240217',
|
||||
'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811',
|
||||
'timestamp': 1708185600,
|
||||
'release_timestamp': 1708178400,
|
||||
'upload_date': '20240217',
|
||||
},
|
||||
}, {
|
||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||
|
@ -519,7 +519,8 @@ class NhkRadiruIE(InfoExtractor):
|
|||
'series': 'らじる文庫 by ラジオ深夜便 ',
|
||||
'release_timestamp': 1481126700,
|
||||
'upload_date': '20211101',
|
||||
}
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'],
|
||||
}, {
|
||||
# news
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
|
||||
|
@ -539,9 +540,28 @@ class NhkRadiruIE(InfoExtractor):
|
|||
},
|
||||
}]
|
||||
|
||||
_API_URL_TMPL = None
|
||||
|
||||
def _extract_extended_description(self, episode_id, episode):
|
||||
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')}))
|
||||
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
|
||||
detail_url = try_call(
|
||||
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3))
|
||||
if not detail_url:
|
||||
return
|
||||
|
||||
full_meta = traverse_obj(
|
||||
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False),
|
||||
('list', service, 0, {dict})) or {}
|
||||
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta)
|
||||
|
||||
def _extract_episode_info(self, headline, programme_id, series_meta):
|
||||
episode_id = f'{programme_id}_{headline["headline_id"]}'
|
||||
episode = traverse_obj(headline, ('file_list', 0, {dict}))
|
||||
description = self._extract_extended_description(episode_id, episode)
|
||||
if not description:
|
||||
self.report_warning('Failed to get extended description, falling back to summary')
|
||||
description = traverse_obj(episode, ('file_title_sub', {str}))
|
||||
|
||||
return {
|
||||
**series_meta,
|
||||
|
@ -551,14 +571,21 @@ def _extract_episode_info(self, headline, programme_id, series_meta):
|
|||
'was_live': True,
|
||||
'series': series_meta.get('title'),
|
||||
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
|
||||
'description': description,
|
||||
**traverse_obj(episode, {
|
||||
'title': 'file_title',
|
||||
'description': 'file_title_sub',
|
||||
'timestamp': ('open_time', {unified_timestamp}),
|
||||
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._API_URL_TMPL:
|
||||
return
|
||||
api_config = self._download_xml(
|
||||
'https://www.nhk.or.jp/radio/config/config_web.xml', None, 'Downloading API config', fatal=False)
|
||||
NhkRadiruIE._API_URL_TMPL = try_call(lambda: f'https:{api_config.find(".//url_program_detail").text}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||
programme_id = f'{site_id}_{corner_id}'
|
||||
|
@ -665,7 +692,7 @@ def _real_extract(self, url):
|
|||
|
||||
noa_info = self._download_json(
|
||||
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
|
||||
station, note=f'Downloading {area} station metadata')
|
||||
station, note=f'Downloading {area} station metadata', fatal=False)
|
||||
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
|
||||
|
||||
return {
|
||||
|
|
|
@ -172,9 +172,6 @@ class NiconicoIE(InfoExtractor):
|
|||
|
||||
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
_COMMENT_API_ENDPOINTS = (
|
||||
'https://nvcomment.nicovideo.jp/legacy/api.json',
|
||||
'https://nmsg.nicovideo.jp/api.json',)
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0',
|
||||
|
@ -470,93 +467,16 @@ def get_video_info(*items, get_first=True, **kwargs):
|
|||
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
|
||||
or get_video_info('duration')),
|
||||
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
|
||||
'subtitles': self.extract_subtitles(video_id, api_data, session_api_data),
|
||||
'subtitles': self.extract_subtitles(video_id, api_data),
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, api_data, session_api_data):
|
||||
comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey'))
|
||||
user_id_str = session_api_data.get('serviceUserId')
|
||||
|
||||
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
|
||||
legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
|
||||
|
||||
new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
|
||||
new_danmaku = self._extract_new_comments(
|
||||
new_comments.get('server'), video_id,
|
||||
new_comments.get('params'), new_comments.get('threadKey'))
|
||||
|
||||
if not legacy_danmaku and not new_danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
return
|
||||
|
||||
return {
|
||||
'comments': [{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(legacy_danmaku + new_danmaku),
|
||||
}],
|
||||
}
|
||||
|
||||
def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
|
||||
auth_data = {
|
||||
'user_id': user_id,
|
||||
'userkey': user_key,
|
||||
} if user_id and user_key else {'user_id': ''}
|
||||
|
||||
api_url = traverse_obj(threads, (..., 'server'), get_all=False)
|
||||
|
||||
# Request Start
|
||||
post_data = [{'ping': {'content': 'rs:0'}}]
|
||||
for i, thread in enumerate(threads):
|
||||
thread_id = thread['id']
|
||||
thread_fork = thread['fork']
|
||||
# Post Start (2N)
|
||||
post_data.append({'ping': {'content': f'ps:{i * 2}'}})
|
||||
post_data.append({'thread': {
|
||||
'fork': thread_fork,
|
||||
'language': 0,
|
||||
'nicoru': 3,
|
||||
'scores': 1,
|
||||
'thread': thread_id,
|
||||
'version': '20090904',
|
||||
'with_global': 1,
|
||||
**auth_data,
|
||||
}})
|
||||
# Post Final (2N)
|
||||
post_data.append({'ping': {'content': f'pf:{i * 2}'}})
|
||||
|
||||
# Post Start (2N+1)
|
||||
post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
|
||||
post_data.append({'thread_leaves': {
|
||||
# format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
|
||||
# unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
|
||||
'content': '0-999999:999999,999999,nicoru:999999',
|
||||
'fork': thread_fork,
|
||||
'language': 0,
|
||||
'nicoru': 3,
|
||||
'scores': 1,
|
||||
'thread': thread_id,
|
||||
**auth_data,
|
||||
}})
|
||||
# Post Final (2N+1)
|
||||
post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
|
||||
# Request Final
|
||||
post_data.append({'ping': {'content': 'rf:0'}})
|
||||
|
||||
return self._download_json(
|
||||
f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Content-Type': 'text/plain;charset=UTF-8',
|
||||
},
|
||||
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
|
||||
|
||||
def _extract_new_comments(self, endpoint, video_id, params, thread_key):
|
||||
comments = self._download_json(
|
||||
f'{endpoint}/v1/threads', video_id, data=json.dumps({
|
||||
def _get_subtitles(self, video_id, api_data):
|
||||
comments_info = traverse_obj(api_data, ('comment', 'nvComment', {dict})) or {}
|
||||
danmaku = traverse_obj(self._download_json(
|
||||
f'{comments_info.get("server")}/v1/threads', video_id, data=json.dumps({
|
||||
'additionals': {},
|
||||
'params': params,
|
||||
'threadKey': thread_key,
|
||||
'params': comments_info.get('params'),
|
||||
'threadKey': comments_info.get('threadKey'),
|
||||
}).encode(), fatal=False,
|
||||
headers={
|
||||
'Referer': 'https://www.nicovideo.jp/',
|
||||
|
@ -566,8 +486,19 @@ def _extract_new_comments(self, endpoint, video_id, params, thread_key):
|
|||
'x-frontend-id': '6',
|
||||
'x-frontend-version': '0',
|
||||
},
|
||||
note='Downloading comments (new)', errnote='Failed to download comments (new)')
|
||||
return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...))
|
||||
note='Downloading comments', errnote='Failed to download comments'),
|
||||
('data', 'threads', ..., 'comments', ...))
|
||||
|
||||
if not danmaku:
|
||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||
return
|
||||
|
||||
return {
|
||||
'comments': [{
|
||||
'ext': 'json',
|
||||
'data': json.dumps(danmaku),
|
||||
}],
|
||||
}
|
||||
|
||||
|
||||
class NiconicoPlaylistBaseIE(InfoExtractor):
|
||||
|
|
225
yt_dlp/extractor/ninaprotocol.py
Normal file
225
yt_dlp/extractor/ninaprotocol.py
Normal file
|
@ -0,0 +1,225 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, mimetype2ext, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NinaProtocolIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ninaprotocol\.com/releases/(?P<id>[^/#?]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ninaprotocol.com/releases/3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ',
|
||||
'title': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'channel': 'ppm',
|
||||
'description': 'md5:bb9f9d39d8f786449cd5d0ff7c5772db',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'uploader': 'ppmrecs',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'display_id': 'the-spatulas-march-chant',
|
||||
'upload_date': '20231201',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_1',
|
||||
'title': 'March Chant In April',
|
||||
'track': 'March Chant In April',
|
||||
'ext': 'mp3',
|
||||
'duration': 152,
|
||||
'track_number': 1,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'uploader': 'ppmrecs',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'channel': 'ppm',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'upload_date': '20231201',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2',
|
||||
'title': 'Rescue Mission',
|
||||
'track': 'Rescue Mission',
|
||||
'ext': 'mp3',
|
||||
'duration': 212,
|
||||
'track_number': 2,
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'uploader': 'ppmrecs',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'channel': 'ppm',
|
||||
'upload_date': '20231201',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'timestamp': 1701417610,
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3',
|
||||
'title': 'Slinger Style',
|
||||
'track': 'Slinger Style',
|
||||
'ext': 'mp3',
|
||||
'duration': 179,
|
||||
'track_number': 3,
|
||||
'timestamp': 1701417610,
|
||||
'upload_date': '20231201',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader': 'ppmrecs',
|
||||
'channel': 'ppm',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4',
|
||||
'title': 'Psychic Signal',
|
||||
'track': 'Psychic Signal',
|
||||
'ext': 'mp3',
|
||||
'duration': 220,
|
||||
'track_number': 4,
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'upload_date': '20231201',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'timestamp': 1701417610,
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'channel': 'ppm',
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'uploader': 'ppmrecs',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5',
|
||||
'title': 'Curvy Color',
|
||||
'track': 'Curvy Color',
|
||||
'ext': 'mp3',
|
||||
'duration': 148,
|
||||
'track_number': 5,
|
||||
'timestamp': 1701417610,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'channel': 'ppm',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'uploader': 'ppmrecs',
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'upload_date': '20231201',
|
||||
}
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6',
|
||||
'title': 'Caveman Star',
|
||||
'track': 'Caveman Star',
|
||||
'ext': 'mp3',
|
||||
'duration': 121,
|
||||
'track_number': 6,
|
||||
'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP',
|
||||
'thumbnail': 'https://www.arweave.net/VyZA6CBeUuqP174khvSrD44Eosi3MLVyWN42uaQKg50',
|
||||
'tags': ['punk', 'postpresentmedium', 'cambridge'],
|
||||
'album_artist': 'Post Present Medium ',
|
||||
'uploader': 'ppmrecs',
|
||||
'timestamp': 1701417610,
|
||||
'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A',
|
||||
'album': 'The Spatulas - March Chant',
|
||||
'channel': 'ppm',
|
||||
'upload_date': '20231201',
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'url': 'https://www.ninaprotocol.com/releases/f-g-s-american-shield',
|
||||
'info_dict': {
|
||||
'id': '76PZnJwaMgViQHYfA4NYJXds7CmW6vHQKAtQUxGene6J',
|
||||
'description': 'md5:63f08d5db558b4b36e1896f317062721',
|
||||
'title': 'F.G.S. - American Shield',
|
||||
'uploader_id': 'Ej3rozs11wYqFk1Gs6oggGCkGLz8GzBhmJfnUxf6gPci',
|
||||
'channel_id': '6JuksCZPXuP16wJ1BUfwuukJzh42C7guhLrFPPkVJfyE',
|
||||
'channel': 'tinkscough',
|
||||
'tags': [],
|
||||
'album_artist': 'F.G.S.',
|
||||
'album': 'F.G.S. - American Shield',
|
||||
'thumbnail': 'https://www.arweave.net/YJpgImkXLT9SbpFb576KuZ5pm6bdvs452LMs3Rx6lm8',
|
||||
'display_id': 'f-g-s-american-shield',
|
||||
'uploader': 'flannerysilva',
|
||||
'timestamp': 1702395858,
|
||||
'upload_date': '20231212',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
'url': 'https://www.ninaprotocol.com/releases/time-to-figure-things-out',
|
||||
'info_dict': {
|
||||
'id': '6Zi1nC5hj6b13NkpxVYwRhFy6mYA7oLBbe9DMrgGDcYh',
|
||||
'display_id': 'time-to-figure-things-out',
|
||||
'description': 'md5:960202ed01c3134bb8958f1008527e35',
|
||||
'timestamp': 1706283607,
|
||||
'title': 'DJ STEPDAD - time to figure things out',
|
||||
'album_artist': 'DJ STEPDAD',
|
||||
'uploader': 'tddvsss',
|
||||
'upload_date': '20240126',
|
||||
'album': 'time to figure things out',
|
||||
'uploader_id': 'AXQNRgTyYsySyAMFDwxzumuGjfmoXshorCesjpquwCBi',
|
||||
'thumbnail': 'https://www.arweave.net/O4i8bcKVqJVZvNeHHFp6r8knpFGh9ZwEgbeYacr4nss',
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_count': 4,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
release = self._download_json(
|
||||
f'https://api.ninaprotocol.com/v1/releases/{video_id}', video_id)['release']
|
||||
|
||||
video_id = release.get('publicKey') or video_id
|
||||
|
||||
common_info = traverse_obj(release, {
|
||||
'album': ('metadata', 'properties', 'title', {str}),
|
||||
'album_artist': ((('hub', 'data'), 'publisherAccount'), 'displayName', {str}),
|
||||
'timestamp': ('datetime', {parse_iso8601}),
|
||||
'thumbnail': ('metadata', 'image', {url_or_none}),
|
||||
'uploader': ('publisherAccount', 'handle', {str}),
|
||||
'uploader_id': ('publisherAccount', 'publicKey', {str}),
|
||||
'channel': ('hub', 'handle', {str}),
|
||||
'channel_id': ('hub', 'publicKey', {str}),
|
||||
}, get_all=False)
|
||||
common_info['tags'] = traverse_obj(release, ('metadata', 'properties', 'tags', ..., {str}))
|
||||
|
||||
entries = []
|
||||
for track_num, track in enumerate(traverse_obj(release, (
|
||||
'metadata', 'properties', 'files', lambda _, v: url_or_none(v['uri']))), 1):
|
||||
entries.append({
|
||||
'id': f'{video_id}_{track_num}',
|
||||
'url': track['uri'],
|
||||
**traverse_obj(track, {
|
||||
'title': ('track_title', {str}),
|
||||
'track': ('track_title', {str}),
|
||||
'ext': ('type', {mimetype2ext}),
|
||||
'track_number': ('track', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'vcodec': 'none',
|
||||
**common_info,
|
||||
})
|
||||
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'id': video_id,
|
||||
'entries': entries,
|
||||
**traverse_obj(release, {
|
||||
'display_id': ('slug', {str}),
|
||||
'title': ('metadata', 'name', {str}),
|
||||
'description': ('metadata', 'description', {str}),
|
||||
}),
|
||||
**common_info,
|
||||
}
|
72
yt_dlp/extractor/ninenews.py
Normal file
72
yt_dlp/extractor/ninenews.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
from .common import InfoExtractor
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from ..utils import ExtractorError
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NineNewsIE(InfoExtractor):
|
||||
IE_NAME = '9News'
|
||||
_VALID_URL = r'https?://(?:www\.)?9news\.com\.au/(?:[\w-]+/){2,3}(?P<id>[\w-]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.9news.com.au/videos/national/fair-trading-pulls-dozens-of-toys-from-shelves/clqgc7dvj000y0jnvfism0w5m',
|
||||
'md5': 'd1a65b2e9d126e5feb9bc5cb96e62c80',
|
||||
'info_dict': {
|
||||
'id': '6343717246112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Fair Trading pulls dozens of toys from shelves',
|
||||
'description': 'Fair Trading Australia have been forced to pull dozens of toys from shelves over hazard fears.',
|
||||
'thumbnail': 'md5:bdbe44294e2323b762d97acf8843f66c',
|
||||
'duration': 93.44,
|
||||
'timestamp': 1703231748,
|
||||
'upload_date': '20231222',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'christmas presents', 'toys', 'fair trading', 'au_news'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.9news.com.au/world/tape-reveals-donald-trump-pressured-michigan-officials-not-to-certify-2020-vote-a-new-report-says/0b8b880e-7d3c-41b9-b2bd-55bc7e492259',
|
||||
'md5': 'a885c44d20898c3e70e9a53e8188cea1',
|
||||
'info_dict': {
|
||||
'id': '6343587450112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Trump found ineligible to run for president by state court',
|
||||
'description': 'md5:40e6e7db7a4ac6be0e960569a5af6066',
|
||||
'thumbnail': 'md5:3e132c48c186039fd06c10787de9bff2',
|
||||
'duration': 104.64,
|
||||
'timestamp': 1703058034,
|
||||
'upload_date': '20231220',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'ineligible', 'presidential candidate', 'donald trump', 'au_news'],
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.9news.com.au/national/outrage-as-parents-banned-from-giving-gifts-to-kindergarten-teachers/e19b49d4-a1a4-4533-9089-6e10e2d9386a',
|
||||
'info_dict': {
|
||||
'id': '6343716797112',
|
||||
'ext': 'mp4',
|
||||
'title': 'Outrage as parents banned from giving gifts to kindergarten teachers',
|
||||
'description': 'md5:7a8b0ed2f9e08875fd9a3e86e462bc46',
|
||||
'thumbnail': 'md5:5ee4d66717bdd0dee9fc9a705ef041b8',
|
||||
'duration': 91.307,
|
||||
'timestamp': 1703229584,
|
||||
'upload_date': '20231222',
|
||||
'uploader_id': '664969388001',
|
||||
'tags': ['networkclip', 'aunews_aunationalninenews', 'presents', 'teachers', 'kindergarten', 'au_news'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, article_id)
|
||||
initial_state = self._search_json(
|
||||
r'var\s+__INITIAL_STATE__\s*=', webpage, 'initial state', article_id)
|
||||
video_id = traverse_obj(
|
||||
initial_state, ('videoIndex', 'currentVideo', 'brightcoveId', {str}),
|
||||
('article', ..., 'media', lambda _, v: v['type'] == 'video', 'urn', {str}), get_all=False)
|
||||
account = traverse_obj(initial_state, (
|
||||
'videoIndex', 'config', (None, 'video'), 'account', {str}), get_all=False)
|
||||
|
||||
if not video_id or not account:
|
||||
raise ExtractorError('Unable to get the required video data')
|
||||
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account}/default_default/index.html?videoId={video_id}',
|
||||
BrightcoveNewIE, video_id)
|
|
@ -135,14 +135,15 @@ class NovaIE(InfoExtractor):
|
|||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||
'md5': '249baab7d0104e186e78b0899c7d5f28',
|
||||
'md5': 'da8f3f1fcdaf9fb0f112a32a165760a3',
|
||||
'info_dict': {
|
||||
'id': '1757139',
|
||||
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
|
||||
'id': '8OvQqEvV3MW',
|
||||
'display_id': '8OvQqEvV3MW',
|
||||
'ext': 'mp4',
|
||||
'title': 'Podzemní nemocnice v pražské Krči',
|
||||
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
'duration': 151,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
|
||||
|
@ -210,7 +211,7 @@ def _real_extract(self, url):
|
|||
|
||||
# novaplus
|
||||
embed_id = self._search_regex(
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media(?:tn)?\.cms\.nova\.cz/embed/([^/?#&"\']+)',
|
||||
webpage, 'embed url', default=None)
|
||||
if embed_id:
|
||||
return {
|
||||
|
|
199
yt_dlp/extractor/nuum.py
Normal file
199
yt_dlp/extractor/nuum.py
Normal file
|
@ -0,0 +1,199 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
UserNotLive,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NuumBaseIE(InfoExtractor):
|
||||
def _call_api(self, path, video_id, description, query={}):
|
||||
response = self._download_json(
|
||||
f'https://nuum.ru/api/v2/{path}', video_id, query=query,
|
||||
note=f'Downloading {description} metadata',
|
||||
errnote=f'Unable to download {description} metadata')
|
||||
if error := response.get('error'):
|
||||
raise ExtractorError(f'API returned error: {error!r}')
|
||||
return response['result']
|
||||
|
||||
def _get_channel_info(self, channel_name):
|
||||
return self._call_api(
|
||||
'broadcasts/public', video_id=channel_name, description='channel',
|
||||
query={
|
||||
'with_extra': 'true',
|
||||
'channel_name': channel_name,
|
||||
'with_deleted': 'true',
|
||||
})
|
||||
|
||||
def _parse_video_data(self, container, extract_formats=True):
|
||||
stream = traverse_obj(container, ('media_container_streams', 0, {dict})) or {}
|
||||
media = traverse_obj(stream, ('stream_media', 0, {dict})) or {}
|
||||
media_url = traverse_obj(media, (
|
||||
'media_meta', ('media_archive_url', 'media_url'), {url_or_none}), get_all=False)
|
||||
|
||||
video_id = str(container['media_container_id'])
|
||||
is_live = media.get('media_status') == 'RUNNING'
|
||||
|
||||
formats, subtitles = None, None
|
||||
if extract_formats:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
media_url, video_id, 'mp4', live=is_live)
|
||||
|
||||
return filter_dict({
|
||||
'id': video_id,
|
||||
'is_live': is_live,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(container, {
|
||||
'title': ('media_container_name', {str}),
|
||||
'description': ('media_container_description', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'channel': ('media_container_channel', 'channel_name', {str}),
|
||||
'channel_id': ('media_container_channel', 'channel_id', {str_or_none}),
|
||||
}),
|
||||
**traverse_obj(stream, {
|
||||
'view_count': ('stream_total_viewers', {int_or_none}),
|
||||
'concurrent_view_count': ('stream_current_viewers', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(media, {
|
||||
'duration': ('media_duration', {int_or_none}),
|
||||
'thumbnail': ('media_meta', ('media_preview_archive_url', 'media_preview_url'), {url_or_none}),
|
||||
}, get_all=False),
|
||||
})
|
||||
|
||||
|
||||
class NuumMediaIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:media'
|
||||
_VALID_URL = r'https?://nuum\.ru/(?:streams|videos|clips)/(?P<id>[\d]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/streams/1592713-7-days-to-die',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
|
||||
'md5': 'f1d9118a30403e32b702a204eb03aca3',
|
||||
'info_dict': {
|
||||
'id': '1567547',
|
||||
'ext': 'mp4',
|
||||
'title': 'Toxi$ - Hurtz',
|
||||
'description': '',
|
||||
'timestamp': 1702631651,
|
||||
'upload_date': '20231215',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'channel_id': '6911',
|
||||
'channel': 'toxis',
|
||||
'duration': 116,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://nuum.ru/clips/1552564-pro-misu',
|
||||
'md5': 'b248ae1565b1e55433188f11beeb0ca1',
|
||||
'info_dict': {
|
||||
'id': '1552564',
|
||||
'ext': 'mp4',
|
||||
'title': 'Про Мису 🙃',
|
||||
'timestamp': 1701971828,
|
||||
'upload_date': '20231207',
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'channel_id': '3320',
|
||||
'channel': 'Misalelik',
|
||||
'duration': 41,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._call_api(f'media-containers/{video_id}', video_id, 'media')
|
||||
|
||||
return self._parse_video_data(video_data)
|
||||
|
||||
|
||||
class NuumLiveIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:live'
|
||||
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/?(?:$|[#?])'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/channel/mts_live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
channel_info = self._get_channel_info(channel)
|
||||
if traverse_obj(channel_info, ('channel', 'channel_is_live')) is False:
|
||||
raise UserNotLive(video_id=channel)
|
||||
|
||||
info = self._parse_video_data(channel_info['media_container'])
|
||||
return {
|
||||
'webpage_url': f'https://nuum.ru/streams/{info["id"]}',
|
||||
'extractor_key': NuumMediaIE.ie_key(),
|
||||
'extractor': NuumMediaIE.IE_NAME,
|
||||
**info,
|
||||
}
|
||||
|
||||
|
||||
class NuumTabIE(NuumBaseIE):
|
||||
IE_NAME = 'nuum:tab'
|
||||
_VALID_URL = r'https?://nuum\.ru/channel/(?P<id>[^/#?]+)/(?P<type>streams|videos|clips)'
|
||||
_TESTS = [{
|
||||
'url': 'https://nuum.ru/channel/dankon_/clips',
|
||||
'info_dict': {
|
||||
'id': 'dankon__clips',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 29,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/channel/dankon_/videos',
|
||||
'info_dict': {
|
||||
'id': 'dankon__videos',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://nuum.ru/channel/dankon_/streams',
|
||||
'info_dict': {
|
||||
'id': 'dankon__streams',
|
||||
'title': 'Dankon_',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, channel_id, tab_type, tab_id, page):
|
||||
CONTAINER_TYPES = {
|
||||
'clips': ['SHORT_VIDEO', 'REVIEW_VIDEO'],
|
||||
'videos': ['LONG_VIDEO'],
|
||||
'streams': ['SINGLE'],
|
||||
}
|
||||
|
||||
media_containers = self._call_api(
|
||||
'media-containers', video_id=tab_id, description=f'{tab_type} tab page {page + 1}',
|
||||
query={
|
||||
'limit': self._PAGE_SIZE,
|
||||
'offset': page * self._PAGE_SIZE,
|
||||
'channel_id': channel_id,
|
||||
'media_container_status': 'STOPPED',
|
||||
'media_container_type': CONTAINER_TYPES[tab_type],
|
||||
})
|
||||
for container in traverse_obj(media_containers, (..., {dict})):
|
||||
metadata = self._parse_video_data(container, extract_formats=False)
|
||||
yield self.url_result(f'https://nuum.ru/videos/{metadata["id"]}', NuumMediaIE, **metadata)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name, tab_type = self._match_valid_url(url).group('id', 'type')
|
||||
tab_id = f'{channel_name}_{tab_type}'
|
||||
channel_data = self._get_channel_info(channel_name)['channel']
|
||||
|
||||
return self.playlist_result(OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, channel_data['channel_id'], tab_type, tab_id), self._PAGE_SIZE),
|
||||
playlist_id=tab_id, playlist_title=channel_data.get('channel_name'))
|
|
@ -1,50 +1,93 @@
|
|||
import hmac
|
||||
import hashlib
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NYTimesBaseIE(InfoExtractor):
|
||||
_SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
|
||||
_DNS_NAMESPACE = uuid.UUID('36dd619a-56dc-595b-9e09-37f4152c7b5d')
|
||||
_TOKEN = 'MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuNIzKBOFB77aT/jN/FQ+/QVKWq5V1ka1AYmCR9hstz1pGNPH5ajOU9gAqta0T89iPnhjwla+3oec/Z3kGjxbpv6miQXufHFq3u2RC6HyU458cLat5kVPSOQCe3VVB5NRpOlRuwKHqn0txfxnwSSj8mqzstR997d3gKB//RO9zE16y3PoWlDQXkASngNJEWvL19iob/xwAkfEWCjyRILWFY0JYX3AvLMSbq7wsqOCE5srJpo7rRU32zsByhsp1D5W9OYqqwDmflsgCEQy2vqTsJjrJohuNg+urMXNNZ7Y3naMoqttsGDrWVxtPBafKMI8pM2ReNZBbGQsQXRzQNo7+QIDAQAB'
|
||||
_GRAPHQL_API = 'https://samizdat-graphql.nytimes.com/graphql/v2'
|
||||
_GRAPHQL_QUERY = '''query VideoQuery($id: String!) {
|
||||
video(id: $id) {
|
||||
... on Video {
|
||||
bylines {
|
||||
renderedRepresentation
|
||||
}
|
||||
duration
|
||||
firstPublished
|
||||
promotionalHeadline
|
||||
promotionalMedia {
|
||||
... on Image {
|
||||
crops {
|
||||
name
|
||||
renditions {
|
||||
name
|
||||
width
|
||||
height
|
||||
url
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
renditions {
|
||||
type
|
||||
width
|
||||
height
|
||||
url
|
||||
bitrate
|
||||
}
|
||||
summary
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _extract_video_from_id(self, video_id):
|
||||
# Authorization generation algorithm is reverse engineered from `signer` in
|
||||
# http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
|
||||
path = '/svc/video/api/v3/video/' + video_id
|
||||
hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
|
||||
video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
|
||||
'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
|
||||
'X-NYTV': 'vhs',
|
||||
}, fatal=False)
|
||||
if not video_data:
|
||||
video_data = self._download_json(
|
||||
'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
|
||||
video_id, 'Downloading video JSON')
|
||||
def _call_api(self, media_id):
|
||||
# reference: `id-to-uri.js`
|
||||
video_uuid = uuid.uuid5(self._DNS_NAMESPACE, 'video')
|
||||
media_uuid = uuid.uuid5(video_uuid, media_id)
|
||||
|
||||
title = video_data['headline']
|
||||
return traverse_obj(self._download_json(
|
||||
self._GRAPHQL_API, media_id, 'Downloading JSON from GraphQL API', data=json.dumps({
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {'id': f'nyt://video/{media_uuid}'},
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Nyt-App-Type': 'vhs',
|
||||
'Nyt-App-Version': 'v3.52.21',
|
||||
'Nyt-Token': self._TOKEN,
|
||||
'Origin': 'https://nytimes.com',
|
||||
}, fatal=False), ('data', 'video', {dict})) or {}
|
||||
|
||||
def get_file_size(file_size):
|
||||
if isinstance(file_size, int):
|
||||
return file_size
|
||||
elif isinstance(file_size, dict):
|
||||
return int(file_size.get('value', 0))
|
||||
else:
|
||||
return None
|
||||
def _extract_thumbnails(self, thumbs):
|
||||
return traverse_obj(thumbs, (lambda _, v: url_or_none(v['url']), {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}), default=None)
|
||||
|
||||
def _extract_formats_and_subtitles(self, video_id, content_media_json):
|
||||
urls = []
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for video in video_data.get('renditions', []):
|
||||
for video in traverse_obj(content_media_json, ('renditions', ..., {dict})):
|
||||
video_url = video.get('url')
|
||||
format_id = video.get('type')
|
||||
if not video_url or format_id == 'thumbs' or video_url in urls:
|
||||
|
@ -56,11 +99,9 @@ def get_file_size(file_size):
|
|||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id or 'hls', fatal=False)
|
||||
formats.extend(m3u8_fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
continue
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# video_url, video_id, format_id or 'dash', fatal=False))
|
||||
continue # all mpd urls give 404 errors
|
||||
else:
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
|
@ -68,55 +109,50 @@ def get_file_size(file_size):
|
|||
'vcodec': video.get('videoencoding') or video.get('video_codec'),
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
|
||||
'filesize': traverse_obj(video, (
|
||||
('file_size', 'fileSize'), (None, ('value')), {int_or_none}), get_all=False),
|
||||
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for image in video_data.get('images', []):
|
||||
image_url = image.get('url')
|
||||
if not image_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': 'http://www.nytimes.com/' + image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
publication_date = video_data.get('publication_date')
|
||||
timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
|
||||
def _extract_video(self, media_id):
|
||||
data = self._call_api(media_id)
|
||||
formats, subtitles = self._extract_formats_and_subtitles(media_id, data)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video_data.get('summary'),
|
||||
'timestamp': timestamp,
|
||||
'uploader': video_data.get('byline'),
|
||||
'duration': float_or_none(video_data.get('duration'), 1000),
|
||||
'id': media_id,
|
||||
'title': data.get('promotionalHeadline'),
|
||||
'description': data.get('summary'),
|
||||
'timestamp': parse_iso8601(data.get('firstPublished')),
|
||||
'duration': float_or_none(data.get('duration'), scale=1000),
|
||||
'creator': ', '.join(traverse_obj(data, ( # TODO: change to 'creators'
|
||||
'bylines', ..., 'renderedRepresentation', {lambda x: remove_start(x, 'By ')}))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnails': self._extract_thumbnails(
|
||||
traverse_obj(data, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
||||
|
||||
class NYTimesIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
|
||||
'md5': 'd665342765db043f7e225cff19df0f2d',
|
||||
'md5': 'a553aa344014e3723d33893d89d4defc',
|
||||
'info_dict': {
|
||||
'id': '100000002847155',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'Verbatim: What Is a Photocopier?',
|
||||
'description': 'md5:93603dada88ddbda9395632fdc5da260',
|
||||
'timestamp': 1398631707,
|
||||
'upload_date': '20140427',
|
||||
'uploader': 'Brett Weiner',
|
||||
'timestamp': 1398646132,
|
||||
'upload_date': '20140428',
|
||||
'creator': 'Brett Weiner',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.+\.jpg',
|
||||
'duration': 419,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
|
||||
'only_matching': True,
|
||||
|
@ -125,138 +161,260 @@ class NYTimesIE(NYTimesBaseIE):
|
|||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
return self._extract_video(video_id)
|
||||
|
||||
|
||||
class NYTimesArticleIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
|
||||
_VALID_URL = r'https?://(?:www\.)?nytimes\.com/\d{4}/\d{2}/\d{2}/(?!books|podcasts)[^/?#]+/(?:\w+/)?(?P<id>[^./?#]+)(?:\.html)?'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
|
||||
'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
'id': '100000003628438',
|
||||
'ext': 'mov',
|
||||
'title': 'New Minimum Wage: $70,000 a Year',
|
||||
'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
|
||||
'timestamp': 1429033037,
|
||||
'ext': 'mp4',
|
||||
'title': 'One Company’s New Minimum Wage: $70,000 a Year',
|
||||
'description': 'md5:89ba9ab67ca767bb92bf823d1f138433',
|
||||
'timestamp': 1429047468,
|
||||
'upload_date': '20150414',
|
||||
'uploader': 'Matthew Williams',
|
||||
}
|
||||
'creator': 'Patricia Cohen',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 119.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
|
||||
'md5': 'e0d52040cafb07662acf3c9132db3575',
|
||||
# article with audio and no video
|
||||
'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
|
||||
'md5': '2365b3555c8aa7f4dd34ca735ad02e6a',
|
||||
'info_dict': {
|
||||
'id': '100000004709062',
|
||||
'title': 'The Run-Up: ‘He Was Like an Octopus’',
|
||||
'id': '100000009110381',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
|
||||
'series': 'The Run-Up',
|
||||
'episode': '‘He Was Like an Octopus’',
|
||||
'episode_number': 20,
|
||||
'duration': 2130,
|
||||
}
|
||||
'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
|
||||
'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
|
||||
'timestamp': 1695960700,
|
||||
'upload_date': '20230929',
|
||||
'creator': 'Stephanie Nolen, Natalija Gormalova',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 1322,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
|
||||
'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
'id': '100000004709479',
|
||||
'title': 'The Rise of Hitler',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:bce877fd9e3444990cb141875fab0028',
|
||||
'creator': 'Pamela Paul',
|
||||
'duration': 3475,
|
||||
'id': '100000009202270',
|
||||
'ext': 'mp4',
|
||||
'title': 'Kamala Harris Defends Biden Policies, but Says ‘More Work’ Needed to Reach Voters',
|
||||
'description': 'md5:de4212a7e19bb89e4fb14210ca915f1f',
|
||||
'timestamp': 1701290997,
|
||||
'upload_date': '20231129',
|
||||
'uploader': 'By The New York Times',
|
||||
'creator': 'Katie Rogers',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 97.631,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
|
||||
# multiple videos in the same article
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/air-traffic-controllers-safety.html',
|
||||
'info_dict': {
|
||||
'id': 'air-traffic-controllers-safety',
|
||||
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
|
||||
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
|
||||
'upload_date': '20231202',
|
||||
'creator': 'Emily Steel, Sydney Ember',
|
||||
'timestamp': 1701511264,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_podcast_from_json(self, json, page_id, webpage):
|
||||
podcast_audio = self._parse_json(
|
||||
json, page_id, transform_source=js_to_json)
|
||||
def _extract_content_from_block(self, block):
|
||||
details = traverse_obj(block, {
|
||||
'id': ('sourceId', {str}),
|
||||
'uploader': ('bylines', ..., 'renderedRepresentation', {str}),
|
||||
'duration': (None, (('duration', {lambda x: float_or_none(x, scale=1000)}), ('length', {int_or_none}))),
|
||||
'timestamp': ('firstPublished', {parse_iso8601}),
|
||||
'series': ('podcastSeries', {str}),
|
||||
}, get_all=False)
|
||||
|
||||
audio_data = podcast_audio['data']
|
||||
track = audio_data['track']
|
||||
|
||||
episode_title = track['title']
|
||||
video_url = track['source']
|
||||
|
||||
description = track.get('description') or self._html_search_meta(
|
||||
['og:description', 'twitter:description'], webpage)
|
||||
|
||||
podcast_title = audio_data.get('podcast', {}).get('title')
|
||||
title = ('%s: %s' % (podcast_title, episode_title)
|
||||
if podcast_title else episode_title)
|
||||
|
||||
episode = audio_data.get('podcast', {}).get('episode') or ''
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
|
||||
formats, subtitles = self._extract_formats_and_subtitles(details.get('id'), block)
|
||||
# audio articles will have an url and no formats
|
||||
url = traverse_obj(block, ('fileUrl', {url_or_none}))
|
||||
if not formats and url:
|
||||
formats.append({'url': url, 'vcodec': 'none'})
|
||||
|
||||
return {
|
||||
'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'creator': track.get('credit'),
|
||||
'series': podcast_title,
|
||||
'episode': episode_title,
|
||||
'episode_number': episode_number,
|
||||
'duration': int_or_none(track.get('duration')),
|
||||
**details,
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
block, ('promotionalMedia', 'crops', ..., 'renditions', ...))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
art_json = self._search_json(
|
||||
r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
|
||||
transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-videoid=["\'](\d+)', webpage, 'video id',
|
||||
default=None, fatal=False)
|
||||
if video_id is not None:
|
||||
return self._extract_video_from_id(video_id)
|
||||
blocks = traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., ('ledeMedia', None),
|
||||
lambda _, v: v['__typename'] in ('Video', 'Audio')))
|
||||
if not blocks:
|
||||
raise ExtractorError('Unable to extract any media blocks from webpage')
|
||||
|
||||
podcast_data = self._search_regex(
|
||||
(r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
|
||||
r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
|
||||
webpage, 'podcast data')
|
||||
return self._extract_podcast_from_json(podcast_data, page_id, webpage)
|
||||
common_info = {
|
||||
'title': remove_end(self._html_extract_title(webpage), ' - The New York Times'),
|
||||
'description': traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
|
||||
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
|
||||
'creator': ', '.join(
|
||||
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
||||
entries = []
|
||||
for block in blocks:
|
||||
entries.append(merge_dicts(self._extract_content_from_block(block), common_info))
|
||||
|
||||
if len(entries) > 1:
|
||||
return self.playlist_result(entries, page_id, **common_info)
|
||||
|
||||
return {
|
||||
'id': page_id,
|
||||
**entries[0],
|
||||
}
|
||||
|
||||
|
||||
class NYTimesCookingIE(NYTimesBaseIE):
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/(?:guid|recip)es/(?P<id>\d+)'
|
||||
IE_NAME = 'NYTimesCookingGuide'
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/guides/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||
'md5': 'dab81fa2eaeb3f9ed47498bdcfcdc1d3',
|
||||
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||
'info_dict': {
|
||||
'id': '100000004756089',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1479383008,
|
||||
'uploader': 'By SHAW LASH, ADAM SAEWITZ and JAMES HERRON',
|
||||
'title': 'Cranberry Tart',
|
||||
'upload_date': '20161117',
|
||||
'description': 'If you are a fan of lemon curd or the classic French tarte au citron, you will love this cranberry version.',
|
||||
'id': '13-how-to-cook-a-turkey',
|
||||
'title': 'How to Cook a Turkey',
|
||||
'description': 'md5:726cfd3f9b161bdf5c279879e8050ca0',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# single video example
|
||||
'url': 'https://cooking.nytimes.com/guides/50-how-to-make-mac-and-cheese',
|
||||
'md5': '64415805fe0b8640fce6b0b9def5989a',
|
||||
'info_dict': {
|
||||
'id': '100000005835845',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Make Mac and Cheese',
|
||||
'description': 'md5:b8f2f33ec1fb7523b21367147c9594f1',
|
||||
'timestamp': 1522950315,
|
||||
'upload_date': '20180405',
|
||||
'duration': 9.51,
|
||||
'creator': 'Alison Roman',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/guides/13-how-to-cook-a-turkey',
|
||||
'md5': '4b2e8c70530a89b8d905a2b572316eb8',
|
||||
'url': 'https://cooking.nytimes.com/guides/20-how-to-frost-a-cake',
|
||||
'md5': '64415805fe0b8640fce6b0b9def5989a',
|
||||
'info_dict': {
|
||||
'id': '100000003951728',
|
||||
'ext': 'mov',
|
||||
'timestamp': 1445509539,
|
||||
'description': 'Turkey guide',
|
||||
'upload_date': '20151022',
|
||||
'title': 'Turkey',
|
||||
}
|
||||
'id': '20-how-to-frost-a-cake',
|
||||
'title': 'How to Frost a Cake',
|
||||
'description': 'md5:a31fe3b98a8ce7b98aae097730c269cd',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
|
||||
description = self._html_search_meta(['og:description', 'twitter:description'], webpage)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'video id')
|
||||
lead_video_id = self._search_regex(
|
||||
r'data-video-player-id="(\d+)"></div>', webpage, 'lead video')
|
||||
media_ids = traverse_obj(
|
||||
get_elements_html_by_class('video-item', webpage), (..., {extract_attributes}, 'data-video-id'))
|
||||
|
||||
return self._extract_video_from_id(video_id)
|
||||
if media_ids:
|
||||
media_ids.append(lead_video_id)
|
||||
return self.playlist_result(
|
||||
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
|
||||
|
||||
return {
|
||||
**self._extract_video(lead_video_id),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'creator': self._search_regex( # TODO: change to 'creators'
|
||||
r'<span itemprop="author">([^<]+)</span></p>', webpage, 'author', default=None),
|
||||
}
|
||||
|
||||
|
||||
class NYTimesCookingRecipeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://cooking\.nytimes\.com/recipes/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cooking.nytimes.com/recipes/1017817-cranberry-curd-tart',
|
||||
'md5': '579e83bbe8e61e9de67f80edba8a78a8',
|
||||
'info_dict': {
|
||||
'id': '1017817',
|
||||
'ext': 'mp4',
|
||||
'title': 'Cranberry Curd Tart',
|
||||
'description': 'md5:ad77a3fc321db636256d4343c5742152',
|
||||
'timestamp': 1447804800,
|
||||
'upload_date': '20151118',
|
||||
'creator': 'David Tanis',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/recipes/1024781-neapolitan-checkerboard-cookies',
|
||||
'md5': '58df35998241dcf0620e99e646331b42',
|
||||
'info_dict': {
|
||||
'id': '1024781',
|
||||
'ext': 'mp4',
|
||||
'title': 'Neapolitan Checkerboard Cookies',
|
||||
'description': 'md5:ba12394c585ababea951cb6d2fcc6631',
|
||||
'timestamp': 1701302400,
|
||||
'upload_date': '20231130',
|
||||
'creator': 'Sue Li',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cooking.nytimes.com/recipes/1019516-overnight-oats',
|
||||
'md5': '2fe7965a3adc899913b8e25ada360823',
|
||||
'info_dict': {
|
||||
'id': '1019516',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1546387200,
|
||||
'description': 'md5:8856ce10239161bd2596ac335b9f9bfb',
|
||||
'upload_date': '20190102',
|
||||
'title': 'Overnight Oats',
|
||||
'creator': 'Genevieve Ko',
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
recipe_data = self._search_nextjs_data(webpage, page_id)['props']['pageProps']['recipe']
|
||||
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
recipe_data['videoSrc'], page_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
**traverse_obj(recipe_data, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('topnote', {clean_html}),
|
||||
'timestamp': ('publishedAt', {int_or_none}),
|
||||
'creator': ('contentAttribution', 'cardByline', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': [{'url': thumb_url} for thumb_url in traverse_obj(
|
||||
recipe_data, ('image', 'crops', 'recipe', ..., {url_or_none}))],
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from ..utils import make_archive_id
|
||||
|
||||
|
||||
class OneFootballIE(InfoExtractor):
|
||||
|
@ -7,41 +9,43 @@ class OneFootballIE(InfoExtractor):
|
|||
_TESTS = [{
|
||||
'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
|
||||
'info_dict': {
|
||||
'id': '34012334',
|
||||
'id': 'Y2VtcWAT',
|
||||
'ext': 'mp4',
|
||||
'title': 'Highlights: FC Zürich 3-3 FC Basel',
|
||||
'description': 'md5:33d9855cb790702c4fe42a513700aba8',
|
||||
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334',
|
||||
'timestamp': 1635874604,
|
||||
'upload_date': '20211102'
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/Y2VtcWAT/poster.jpg?width=720',
|
||||
'timestamp': 1635874895,
|
||||
'upload_date': '20211102',
|
||||
'duration': 375.0,
|
||||
'tags': ['Football', 'Soccer', 'OneFootball'],
|
||||
'_old_archive_ids': ['onefootball 34012334'],
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020',
|
||||
'info_dict': {
|
||||
'id': '34041020',
|
||||
'id': 'leVJrMho',
|
||||
'ext': 'mp4',
|
||||
'title': 'Klopp fumes at VAR decisions in West Ham defeat',
|
||||
'description': 'md5:9c50371095a01ad3f63311c73d8f51a5',
|
||||
'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020',
|
||||
'timestamp': 1636314103,
|
||||
'upload_date': '20211107'
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/leVJrMho/poster.jpg?width=720',
|
||||
'timestamp': 1636315232,
|
||||
'upload_date': '20211107',
|
||||
'duration': 93.0,
|
||||
'tags': ['Football', 'Soccer', 'OneFootball'],
|
||||
'_old_archive_ids': ['onefootball 34041020'],
|
||||
},
|
||||
'params': {'skip_download': True}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
data_json = self._search_json_ld(webpage, id)
|
||||
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
|
||||
return {
|
||||
'id': id,
|
||||
'title': data_json.get('title'),
|
||||
'description': data_json.get('description'),
|
||||
'thumbnail': data_json.get('thumbnail'),
|
||||
'timestamp': data_json.get('timestamp'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_json = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
data_json.pop('url', None)
|
||||
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/\w+\.m3u8)', webpage, 'm3u8_url')
|
||||
|
||||
return self.url_result(
|
||||
m3u8_url, JWPlatformIE, video_id, _old_archive_ids=[make_archive_id(self, video_id)],
|
||||
**data_json, url_transparent=True)
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
|
||||
|
||||
class OpenRecBaseIE(InfoExtractor):
|
||||
_M3U8_HEADERS = {'Referer': 'https://www.openrec.tv/'}
|
||||
|
||||
def _extract_pagestore(self, webpage, video_id):
|
||||
return self._parse_json(
|
||||
self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
|
||||
|
@ -21,7 +23,7 @@ def _expand_media(self, video_id, media):
|
|||
if not m3u8_url:
|
||||
continue
|
||||
yield from self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id=name)
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id=name, headers=self._M3U8_HEADERS)
|
||||
|
||||
def _extract_movie(self, webpage, video_id, name, is_live):
|
||||
window_stores = self._extract_pagestore(webpage, video_id)
|
||||
|
@ -60,6 +62,7 @@ def _extract_movie(self, webpage, video_id, name, is_live):
|
|||
'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
|
||||
'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
|
||||
'is_live': is_live,
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
}
|
||||
|
||||
|
||||
|
@ -110,7 +113,7 @@ def _real_extract(self, url):
|
|||
raise ExtractorError('Cannot extract title')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
capture_data.get('source'), video_id, ext='mp4')
|
||||
capture_data.get('source'), video_id, ext='mp4', headers=self._M3U8_HEADERS)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -121,6 +124,7 @@ def _real_extract(self, url):
|
|||
'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
|
||||
'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
|
||||
'upload_date': unified_strdate(capture_data.get('createdAt')),
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import base64
|
||||
import functools
|
||||
import re
|
||||
|
||||
|
@ -565,3 +566,66 @@ def _real_extract(self, url):
|
|||
})
|
||||
|
||||
return self.playlist_result(entries)
|
||||
|
||||
|
||||
class ORFONIE(InfoExtractor):
|
||||
IE_NAME = 'orf:on'
|
||||
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d{8})/(?P<slug>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
|
||||
'info_dict': {
|
||||
'id': '14210000',
|
||||
'ext': 'mp4',
|
||||
'duration': 2651.08,
|
||||
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0167/98/thumb_16697671_segments_highlight_teaser.jpeg',
|
||||
'title': 'School of Champions (4/8)',
|
||||
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
|
||||
'media_type': 'episode',
|
||||
'timestamp': 1706472362,
|
||||
'upload_date': '20240128',
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id, display_id):
|
||||
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
|
||||
api_json = self._download_json(
|
||||
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
|
||||
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
|
||||
if manifest_type == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
manifest_url, display_id, fatal=False, m3u8_id='hls')
|
||||
elif manifest_type == 'dash':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
manifest_url, display_id, fatal=False, mpd_id='dash')
|
||||
else:
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(api_json, {
|
||||
'duration': ('duration_second', {float_or_none}),
|
||||
'title': (('title', 'headline'), {str}),
|
||||
'description': (('description', 'teaser_text'), {str}),
|
||||
'media_type': ('video_type', {str}),
|
||||
}, get_all=False),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
||||
'description': self._html_search_meta(
|
||||
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||
**self._search_json_ld(webpage, display_id, fatal=False),
|
||||
**self._extract_video(video_id, display_id),
|
||||
}
|
||||
|
|
|
@ -275,7 +275,7 @@ def _real_extract(self, url):
|
|||
'ext': ext,
|
||||
'url': post_file['url'],
|
||||
}
|
||||
elif name == 'video':
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
return {
|
||||
**info,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -12,7 +12,7 @@
|
|||
|
||||
class PiaproIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'piapro'
|
||||
_VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>\w+)/?'
|
||||
_VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://piapro.jp/t/NXYR',
|
||||
'md5': 'f7c0f760913fb1d44a1c45a4af793909',
|
||||
|
@ -49,6 +49,9 @@ class PiaproIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://piapro.jp/content/hcw0z3a169wtemz6',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://piapro.jp/t/-SO-',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
_login_status = False
|
||||
|
|
|
@ -1,10 +1,18 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, traverse_obj
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class PlaySuisseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'playsuisse'
|
||||
_VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
|
@ -134,12 +142,47 @@ class PlaySuisseIE(InfoExtractor):
|
|||
id
|
||||
url
|
||||
}'''
|
||||
_LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
|
||||
_LOGIN_PATH = 'B2C_1A__SignInV2'
|
||||
_ID_TOKEN = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_page = self._download_webpage(
|
||||
'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
|
||||
query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
|
||||
settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
|
||||
|
||||
csrf_token = settings['csrf']
|
||||
query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
|
||||
|
||||
status = traverse_obj(self._download_json(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
|
||||
query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
|
||||
'request_type': 'RESPONSE',
|
||||
'signInName': username,
|
||||
'password': password
|
||||
}), expected_status=400), ('status', {int_or_none}))
|
||||
if status == 400:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
|
||||
urlh = self._request_webpage(
|
||||
f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
|
||||
None, 'Downloading ID token', query={
|
||||
'rememberMe': 'false',
|
||||
'csrf_token': csrf_token,
|
||||
**query,
|
||||
'diags': '',
|
||||
})
|
||||
|
||||
self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
|
||||
if not self._ID_TOKEN:
|
||||
raise ExtractorError('Login failed')
|
||||
|
||||
def _get_media_data(self, media_id):
|
||||
# NOTE In the web app, the "locale" header is used to switch between languages,
|
||||
# However this doesn't seem to take effect when passing the header here.
|
||||
response = self._download_json(
|
||||
'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql',
|
||||
'https://www.playsuisse.ch/api/graphql',
|
||||
media_id, data=json.dumps({
|
||||
'operationName': 'AssetWatch',
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
|
@ -150,6 +193,9 @@ def _get_media_data(self, media_id):
|
|||
return response['data']['assetV2']
|
||||
|
||||
def _real_extract(self, url):
|
||||
if not self._ID_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._get_media_data(media_id)
|
||||
info = self._extract_single(media_data)
|
||||
|
@ -168,7 +214,8 @@ def _extract_single(self, media_data):
|
|||
if not media.get('url') or media.get('type') != 'HLS':
|
||||
continue
|
||||
f, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
|
||||
update_url_query(media['url'], {'id_token': self._ID_TOKEN}),
|
||||
media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
|
||||
formats.extend(f)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
|
|
|
@ -87,8 +87,8 @@ def _login(self, host):
|
|||
|
||||
def is_logged(webpage):
|
||||
return any(re.search(p, webpage) for p in (
|
||||
r'class=["\']signOut',
|
||||
r'>Sign\s+[Oo]ut\s*<'))
|
||||
r'id="profileMenuDropdown"',
|
||||
r'class="ph-icon-logout"'))
|
||||
|
||||
if is_logged(login_page):
|
||||
self._logged_in = True
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
class Pr0grammIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)'
|
||||
_TESTS = [{
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/video/5466437',
|
||||
'info_dict': {
|
||||
'id': '5466437',
|
||||
|
@ -36,7 +35,6 @@ class Pr0grammIE(InfoExtractor):
|
|||
'_old_archive_ids': ['pr0grammstatic 5466437'],
|
||||
},
|
||||
}, {
|
||||
# Tags require account
|
||||
'url': 'https://pr0gramm.com/new/3052805:comment28391322',
|
||||
'info_dict': {
|
||||
'id': '3052805',
|
||||
|
@ -71,6 +69,23 @@ class Pr0grammIE(InfoExtractor):
|
|||
'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5848332'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/top/5895149',
|
||||
'info_dict': {
|
||||
'id': '5895149',
|
||||
'ext': 'mp4',
|
||||
'title': 'pr0gramm-5895149 by algoholigSeeManThrower',
|
||||
'tags': 'count:19',
|
||||
'uploader': 'algoholigSeeManThrower',
|
||||
'uploader_id': 457556,
|
||||
'upload_timestamp': 1697580902,
|
||||
'upload_date': '20231018',
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg',
|
||||
'_old_archive_ids': ['pr0grammstatic 5895149'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://pr0gramm.com/static/5466437',
|
||||
'only_matching': True,
|
||||
|
@ -92,15 +107,15 @@ def _is_logged_in(self):
|
|||
def _maximum_flags(self):
|
||||
# We need to guess the flags for the content otherwise the api will raise an error
|
||||
# We can guess the maximum allowed flags for the account from the cookies
|
||||
# Bitflags are (msbf): nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b0001
|
||||
# Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw
|
||||
flags = 0b10001
|
||||
if self._is_logged_in:
|
||||
flags |= 0b1000
|
||||
flags |= 0b01000
|
||||
cookies = self._get_cookies(self.BASE_URL)
|
||||
if 'me' not in cookies:
|
||||
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
||||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
|
||||
flags |= 0b0110
|
||||
flags |= 0b00110
|
||||
|
||||
return flags
|
||||
|
||||
|
@ -134,14 +149,12 @@ def _real_extract(self, url):
|
|||
if not source or not source.endswith('mp4'):
|
||||
self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id)
|
||||
|
||||
tags = None
|
||||
if self._is_logged_in:
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags')
|
||||
tags = traverse_obj(metadata, ('tags', ..., 'tag', {str}))
|
||||
# Sorted by "confidence", higher confidence = earlier in list
|
||||
confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float})))
|
||||
if confidences:
|
||||
tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)]
|
||||
|
||||
formats = traverse_obj(video_info, ('variants', ..., {
|
||||
'format_id': ('name', {str}),
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, traverse_obj, try_call
|
||||
from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PrankCastIE(InfoExtractor):
|
||||
|
@ -64,3 +67,71 @@ def _real_extract(self, url):
|
|||
'categories': [json_info.get('broadcast_category')],
|
||||
'tags': try_call(lambda: json_info['broadcast_tags'].split(','))
|
||||
}
|
||||
|
||||
|
||||
class PrankCastPostIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
|
||||
'info_dict': {
|
||||
'id': '6214',
|
||||
'ext': 'mp3',
|
||||
'title': 'Happy National Rachel Day!',
|
||||
'display_id': 'happy-national-rachel-day-',
|
||||
'timestamp': 1704333938,
|
||||
'uploader': 'Devonanustart',
|
||||
'channel_id': '4',
|
||||
'duration': 13175,
|
||||
'cast': ['Devonanustart'],
|
||||
'description': '',
|
||||
'categories': ['prank call'],
|
||||
'upload_date': '20240104'
|
||||
}
|
||||
}, {
|
||||
'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
|
||||
'info_dict': {
|
||||
'id': '6217',
|
||||
'ext': 'mp3',
|
||||
'title': 'Jake the Work Crow!',
|
||||
'display_id': 'jake-the-work-crow-',
|
||||
'timestamp': 1704346592,
|
||||
'uploader': 'despicabledogs',
|
||||
'channel_id': '957',
|
||||
'duration': 263.287,
|
||||
'cast': ['despicabledogs'],
|
||||
'description': 'https://imgur.com/a/vtxLvKU',
|
||||
'categories': [],
|
||||
'upload_date': '20240104'
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
|
||||
content = self._parse_json(post['post_contents_json'], video_id)[0]
|
||||
|
||||
uploader = post.get('user_name')
|
||||
guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': post.get('post_title') or self._og_search_title(webpage),
|
||||
'display_id': display_id,
|
||||
'url': content.get('url'),
|
||||
'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
|
||||
'uploader': uploader,
|
||||
'channel_id': str_or_none(post.get('user_id')),
|
||||
'duration': float_or_none(content.get('duration')),
|
||||
'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))),
|
||||
'description': post.get('post_body'),
|
||||
'categories': list(filter(None, [content.get('category')])),
|
||||
'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
|
||||
'subtitles': {
|
||||
'live_chat': [{
|
||||
'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
|
||||
'ext': 'json',
|
||||
}],
|
||||
} if post.get('content_id') else None
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import base64
|
||||
import random
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -11,6 +12,7 @@
|
|||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RadikoBaseIE(InfoExtractor):
|
||||
|
@ -159,6 +161,12 @@ def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token,
|
|||
|
||||
return formats
|
||||
|
||||
def _extract_performers(self, prog):
|
||||
performers = traverse_obj(prog, (
|
||||
'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip}))
|
||||
# TODO: change 'artist' fields to 'artists' and return traversal list instead of str
|
||||
return ', '.join(performers) or None
|
||||
|
||||
|
||||
class RadikoIE(RadikoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
|
||||
|
@ -186,10 +194,12 @@ def _real_extract(self, url):
|
|||
return {
|
||||
'id': video_id,
|
||||
'title': try_call(lambda: prog.find('title').text),
|
||||
'artist': self._extract_performers(prog),
|
||||
'description': clean_html(try_call(lambda: prog.find('info').text)),
|
||||
'uploader': try_call(lambda: station_program.find('.//name').text),
|
||||
'uploader_id': station,
|
||||
'timestamp': vid_int,
|
||||
'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)),
|
||||
'is_live': True,
|
||||
'formats': self._extract_formats(
|
||||
video_id=video_id, station=station, is_onair=False,
|
||||
|
@ -243,6 +253,7 @@ def _real_extract(self, url):
|
|||
return {
|
||||
'id': station,
|
||||
'title': title,
|
||||
'artist': self._extract_performers(prog),
|
||||
'description': description,
|
||||
'uploader': station_name,
|
||||
'uploader_id': station,
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
|
@ -91,7 +92,7 @@ def fix_cdata(s):
|
|||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
|
||||
|
||||
if not audio_only and not is_live:
|
||||
formats.extend(self._create_http_urls(media_url, relinker_url, formats))
|
||||
formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id))
|
||||
|
||||
return filter_dict({
|
||||
'is_live': is_live,
|
||||
|
@ -99,7 +100,7 @@ def fix_cdata(s):
|
|||
'formats': formats,
|
||||
})
|
||||
|
||||
def _create_http_urls(self, manifest_url, relinker_url, fmts):
|
||||
def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id):
|
||||
_MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8'
|
||||
_MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
|
||||
_QUALITY = {
|
||||
|
@ -166,6 +167,14 @@ def get_format_info(tbr):
|
|||
'fps': 25,
|
||||
}
|
||||
|
||||
# Check if MP4 download is available
|
||||
try:
|
||||
self._request_webpage(
|
||||
HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability')
|
||||
except ExtractorError as e:
|
||||
self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}')
|
||||
return []
|
||||
|
||||
# filter out single-stream formats
|
||||
fmts = [f for f in fmts
|
||||
if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none']
|
||||
|
|
135
yt_dlp/extractor/redge.py
Normal file
135
yt_dlp/extractor/redge.py
Normal file
|
@ -0,0 +1,135 @@
|
|||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RedCDNLivxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
|
||||
IE_NAME = 'redcdnlivx'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
|
||||
'info_dict': {
|
||||
'id': 'ENC02-638272860000-638292544000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC02',
|
||||
'duration': 19683.982,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
|
||||
'info_dict': {
|
||||
'id': 'ENC18-722333096000-722335562000',
|
||||
'ext': 'mp4',
|
||||
'title': 'ENC18',
|
||||
'duration': 2463.995,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
|
||||
'info_dict': {
|
||||
'id': 'triathlon2018-warsaw-550305000000-550327620000',
|
||||
'ext': 'mp4',
|
||||
'title': 'triathlon2018/warsaw',
|
||||
'duration': 22619.98,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
"""
|
||||
Known methods (first in url path):
|
||||
- `livedash` - DASH MPD
|
||||
- `livehls` - HTTP Live Streaming
|
||||
- `livess` - IIS Smooth Streaming
|
||||
- `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
|
||||
- `sc` - shoutcast/icecast (audio streams, like radio)
|
||||
"""
|
||||
|
||||
def _real_extract(self, url):
|
||||
tenant, path = self._match_valid_url(url).group('tenant', 'id')
|
||||
qs = parse_qs(url)
|
||||
start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
|
||||
stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
|
||||
|
||||
def livx_mode(mode):
|
||||
suffix = ''
|
||||
if mode == 'livess':
|
||||
suffix = '/manifest'
|
||||
elif mode == 'livehls':
|
||||
suffix = '/playlist.m3u8'
|
||||
file_qs = {}
|
||||
if start_time:
|
||||
file_qs['startTime'] = start_time
|
||||
if stop_time:
|
||||
file_qs['stopTime'] = stop_time
|
||||
if mode == 'nvr':
|
||||
file_qs['nolimit'] = 1
|
||||
elif mode != 'sc':
|
||||
file_qs['indexMode'] = 'true'
|
||||
return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
|
||||
|
||||
# no id or title for a transmission. making ones up.
|
||||
title = path \
|
||||
.replace('/live', '').replace('live/', '') \
|
||||
.replace('/channel', '').replace('channel/', '') \
|
||||
.strip('/')
|
||||
video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
|
||||
|
||||
formats = []
|
||||
# downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
|
||||
ism_res = self._download_xml_handle(
|
||||
livx_mode('livess'), video_id,
|
||||
note='Downloading ISM manifest',
|
||||
errnote='Failed to download ISM manifest',
|
||||
fatal=False)
|
||||
ism_doc = None
|
||||
if ism_res is not False:
|
||||
ism_doc, ism_urlh = ism_res
|
||||
formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
|
||||
|
||||
nvr_urlh = self._request_webpage(
|
||||
HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
|
||||
expected_status=lambda _: True)
|
||||
if nvr_urlh and nvr_urlh.status == 200:
|
||||
formats.append({
|
||||
'url': nvr_urlh.url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'direct-0',
|
||||
'preference': -1, # might be slow
|
||||
})
|
||||
formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
|
||||
|
||||
time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
|
||||
duration = traverse_obj(
|
||||
ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None
|
||||
|
||||
live_status = None
|
||||
if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
|
||||
live_status = 'is_live'
|
||||
elif duration:
|
||||
live_status = 'was_live'
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'duration': duration,
|
||||
'live_status': live_status,
|
||||
}
|
|
@ -7,11 +7,12 @@
|
|||
str_to_int,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class RedTubeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.redtube.com/38864951',
|
||||
|
@ -34,6 +35,9 @@ class RedTubeIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'http://it.redtube.com/66418',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.redtube.com.br/103224331',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -79,7 +83,7 @@ def _real_extract(self, url):
|
|||
'media definitions', default='{}'),
|
||||
video_id, fatal=False)
|
||||
for media in medias if isinstance(medias, list) else []:
|
||||
format_url = url_or_none(media.get('videoUrl'))
|
||||
format_url = urljoin('https://www.redtube.com', media.get('videoUrl'))
|
||||
if not format_url:
|
||||
continue
|
||||
format_id = media.get('format')
|
||||
|
|
|
@ -1,8 +1,34 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import format_field, parse_iso8601
|
||||
from ..utils import (
|
||||
MEDIA_EXTENSIONS,
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class RinseFMIE(InfoExtractor):
|
||||
class RinseFMBaseIE(InfoExtractor):
|
||||
@staticmethod
|
||||
def _parse_entry(entry):
|
||||
return {
|
||||
**traverse_obj(entry, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'url': ('fileUrl', {url_or_none}),
|
||||
'release_timestamp': ('episodeDate', {parse_iso8601}),
|
||||
'thumbnail': ('featuredImage', 0, 'filename', {str},
|
||||
{lambda x: x and f'https://rinse.imgix.net/media/{x}'}),
|
||||
'webpage_url': ('slug', {str},
|
||||
{lambda x: x and f'https://rinse.fm/episodes/{x}'}),
|
||||
}),
|
||||
'vcodec': 'none',
|
||||
'extractor_key': RinseFMIE.ie_key(),
|
||||
'extractor': RinseFMIE.IE_NAME,
|
||||
}
|
||||
|
||||
|
||||
class RinseFMIE(RinseFMBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/',
|
||||
|
@ -22,12 +48,42 @@ def _real_extract(self, url):
|
|||
webpage = self._download_webpage(url, display_id)
|
||||
entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry']
|
||||
|
||||
return {
|
||||
'id': entry['id'],
|
||||
'title': entry.get('title'),
|
||||
'url': entry['fileUrl'],
|
||||
'vcodec': 'none',
|
||||
'release_timestamp': parse_iso8601(entry.get('episodeDate')),
|
||||
'thumbnail': format_field(
|
||||
entry, [('featuredImage', 0, 'filename')], 'https://rinse.imgix.net/media/%s', default=None),
|
||||
}
|
||||
return self._parse_entry(entry)
|
||||
|
||||
|
||||
class RinseFMArtistPlaylistIE(RinseFMBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rinse.fm/shows/resources/',
|
||||
'info_dict': {
|
||||
'id': 'resources',
|
||||
'title': '[re]sources',
|
||||
'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.'
|
||||
},
|
||||
'playlist_mincount': 40
|
||||
}, {
|
||||
'url': 'https://rinse.fm/shows/ivy/',
|
||||
'info_dict': {
|
||||
'id': 'ivy',
|
||||
'title': '[IVY]',
|
||||
'description': 'A dedicated space for DNB/Turbo House and 4x4.'
|
||||
},
|
||||
'playlist_mincount': 7
|
||||
}]
|
||||
|
||||
def _entries(self, data):
|
||||
for episode in traverse_obj(data, (
|
||||
'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio)
|
||||
):
|
||||
yield self._parse_entry(episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
title = self._og_search_title(webpage) or self._html_search_meta('title', webpage)
|
||||
description = self._og_search_description(webpage) or self._html_search_meta(
|
||||
'description', webpage)
|
||||
data = self._search_nextjs_data(webpage, playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(data), playlist_id, title, description=description)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue