Merge branch 'master' into bili-legacy

This commit is contained in:
c-basalt 2024-01-30 23:18:57 -05:00
commit 8e67c2837c
333 changed files with 11725 additions and 13639 deletions

View file

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting that yt-dlp is broken on a **supported** site
required: true
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -61,19 +61,18 @@ body:
description: |
It should start like this:
placeholder: |
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.13, Current version: 2023.10.13
yt-dlp is up to date (2023.10.13)
[debug] Request Handlers: urllib, requests
[debug] Loaded 1893 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines>
render: shell
validations:

View file

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting a new site support request
required: true
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -73,19 +73,18 @@ body:
description: |
It should start like this:
placeholder: |
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.13, Current version: 2023.10.13
yt-dlp is up to date (2023.10.13)
[debug] Request Handlers: urllib, requests
[debug] Loaded 1893 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines>
render: shell
validations:

View file

@ -18,7 +18,7 @@ body:
options:
- label: I'm requesting a site-specific feature
required: true
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -69,19 +69,18 @@ body:
description: |
It should start like this:
placeholder: |
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.13, Current version: 2023.10.13
yt-dlp is up to date (2023.10.13)
[debug] Request Handlers: urllib, requests
[debug] Loaded 1893 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines>
render: shell
validations:

View file

@ -18,7 +18,7 @@ body:
options:
- label: I'm reporting a bug unrelated to a specific site
required: true
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true
@ -54,19 +54,18 @@ body:
description: |
It should start like this:
placeholder: |
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.13, Current version: 2023.10.13
yt-dlp is up to date (2023.10.13)
[debug] Request Handlers: urllib, requests
[debug] Loaded 1893 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines>
render: shell
validations:

View file

@ -20,7 +20,7 @@ body:
required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
required: true
@ -50,18 +50,17 @@ body:
description: |
It should start like this:
placeholder: |
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.13, Current version: 2023.10.13
yt-dlp is up to date (2023.10.13)
[debug] Request Handlers: urllib, requests
[debug] Loaded 1893 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines>
render: shell

View file

@ -26,7 +26,7 @@ body:
required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true
- label: I've verified that I'm running yt-dlp version **2023.10.13** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
required: true
@ -56,18 +56,17 @@ body:
description: |
It should start like this:
placeholder: |
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.10.13 [9d339c4] (win32_exe)
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.10.13, Current version: 2023.10.13
yt-dlp is up to date (2023.10.13)
[debug] Request Handlers: urllib, requests
[debug] Loaded 1893 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines>
render: shell

View file

@ -12,7 +12,7 @@ body:
options:
- label: I'm reporting that yt-dlp is broken on a **supported** site
required: true
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true

View file

@ -12,7 +12,7 @@ body:
options:
- label: I'm reporting a new site support request
required: true
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true

View file

@ -12,7 +12,7 @@ body:
options:
- label: I'm requesting a site-specific feature
required: true
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true

View file

@ -12,7 +12,7 @@ body:
options:
- label: I'm reporting a bug unrelated to a specific site
required: true
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true

View file

@ -14,7 +14,7 @@ body:
required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
required: true

View file

@ -20,7 +20,7 @@ body:
required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true
- label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
- label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels))
required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
required: true

View file

@ -40,10 +40,4 @@ ### What is the purpose of your *pull request*?
- [ ] Core bug fix/improvement
- [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes))
<!-- Do NOT edit/remove anything below this! -->
</details><details><summary>Copilot Summary</summary>
copilot:all
</details>

View file

@ -30,6 +30,10 @@ on:
meta_files:
default: true
type: boolean
origin:
required: false
default: ''
type: string
secrets:
GPG_SIGNING_KEY:
required: false
@ -37,11 +41,13 @@ on:
workflow_dispatch:
inputs:
version:
description: Version tag (YYYY.MM.DD[.REV])
description: |
VERSION: yyyy.mm.dd[.rev] or rev
required: true
type: string
channel:
description: Update channel (stable/nightly/...)
description: |
SOURCE of this build's updates: stable/nightly/master/<repo>
required: true
default: stable
type: string
@ -73,16 +79,34 @@ on:
description: SHA2-256SUMS, SHA2-512SUMS, _update_spec
default: true
type: boolean
origin:
description: Origin
required: false
default: 'current repo'
type: choice
options:
- 'current repo'
permissions:
contents: read
jobs:
process:
runs-on: ubuntu-latest
outputs:
origin: ${{ steps.process_origin.outputs.origin }}
steps:
- name: Process origin
id: process_origin
run: |
echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT"
unix:
needs: process
if: inputs.unix
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: "3.10"
@ -96,22 +120,21 @@ jobs:
auto-activate-base: false
- name: Install Requirements
run: |
sudo apt-get -y install zip pandoc man sed
python -m pip install -U pip setuptools wheel
python -m pip install -U Pyinstaller -r requirements.txt
sudo apt -y install zip pandoc man sed
reqs=$(mktemp)
cat > $reqs << EOF
cat > "$reqs" << EOF
python=3.10.*
pyinstaller
cffi
brotli-python
secretstorage
EOF
sed '/^brotli.*/d' requirements.txt >> $reqs
mamba create -n build --file $reqs
sed -E '/^(brotli|secretstorage).*/d' requirements.txt >> "$reqs"
mamba create -n build --file "$reqs"
- name: Prepare
run: |
python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }}
python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python devscripts/make_lazy_extractors.py
- name: Build Unix platform-independent binary
run: |
@ -150,6 +173,7 @@ jobs:
yt-dlp_linux.zip
linux_arm:
needs: process
if: inputs.linux_arm
permissions:
contents: read
@ -162,7 +186,7 @@ jobs:
- aarch64
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
path: ./repo
- name: Virtualized Install, Prepare & Build
@ -180,12 +204,12 @@ jobs:
apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip
python3.8 -m pip install -U pip setuptools wheel
# Cannot access requirements.txt from the repo directory at this stage
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage
run: |
cd repo
python3.8 -m pip install -U Pyinstaller -r requirements.txt # Cached version may be out of date
python3.8 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }}
python3.8 -m pip install -U Pyinstaller secretstorage -r requirements.txt # Cached version may be out of date
python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python3.8 devscripts/make_lazy_extractors.py
python3.8 pyinst.py
@ -206,11 +230,12 @@ jobs:
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
macos:
needs: process
if: inputs.macos
runs-on: macos-11
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
# NB: Building universal2 does not work with python from actions/setup-python
- name: Install Requirements
run: |
@ -221,7 +246,7 @@ jobs:
- name: Prepare
run: |
python3 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }}
python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python3 devscripts/make_lazy_extractors.py
- name: Build
run: |
@ -247,11 +272,12 @@ jobs:
dist/yt-dlp_macos.zip
macos_legacy:
needs: process
if: inputs.macos_legacy
runs-on: macos-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Install Python
# We need the official Python, because the GA ones only support newer macOS versions
env:
@ -272,7 +298,7 @@ jobs:
- name: Prepare
run: |
python3 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }}
python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python3 devscripts/make_lazy_extractors.py
- name: Build
run: |
@ -296,11 +322,12 @@ jobs:
dist/yt-dlp_macos_legacy
windows:
needs: process
if: inputs.windows
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with: # 3.8 is used for Win7 support
python-version: "3.8"
@ -311,7 +338,7 @@ jobs:
- name: Prepare
run: |
python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }}
python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python devscripts/make_lazy_extractors.py
- name: Build
run: |
@ -343,14 +370,15 @@ jobs:
dist/yt-dlp_win.zip
windows32:
needs: process
if: inputs.windows32
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with: # 3.7 is used for Vista support. See https://github.com/yt-dlp/yt-dlp/issues/390
python-version: "3.7"
with:
python-version: "3.8"
architecture: "x86"
- name: Install Requirements
run: |
@ -359,7 +387,7 @@ jobs:
- name: Prepare
run: |
python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }}
python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python devscripts/make_lazy_extractors.py
- name: Build
run: |
@ -387,6 +415,7 @@ jobs:
meta_files:
if: inputs.meta_files && always() && !cancelled()
needs:
- process
- unix
- linux_arm
- macos
@ -407,7 +436,16 @@ jobs:
run: |
cat >> _update_spec << EOF
# This file is used for regulating self-update
lock 2022.08.18.36 .+ Python 3.6
lock 2022.08.18.36 .+ Python 3\.6
lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7
lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6
lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server)
EOF
- name: Sign checksum files

View file

@ -29,7 +29,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL

View file

@ -1,8 +1,32 @@
name: Core Tests
on: [push, pull_request]
on:
push:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
pull_request:
paths:
- .github/**
- devscripts/**
- test/**
- yt_dlp/**.py
- '!yt_dlp/extractor/*.py'
- yt_dlp/extractor/__init__.py
- yt_dlp/extractor/common.py
- yt_dlp/extractor/extractors.py
permissions:
contents: read
concurrency:
group: core-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
tests:
name: Core Tests
@ -12,30 +36,26 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
# CPython 3.11 is in quick-test
python-version: ['3.8', '3.9', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10]
run-tests-ext: [sh]
# CPython 3.8 is in quick-test
python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10]
include:
# atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest
python-version: '3.7'
run-tests-ext: bat
python-version: '3.8'
- os: windows-latest
python-version: '3.12'
run-tests-ext: bat
- os: windows-latest
python-version: pypy-3.9
run-tests-ext: bat
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
- name: Install test requirements
run: pip install pytest -r requirements.txt
- name: Run tests
continue-on-error: False
run: |
python3 -m yt_dlp -v || true # Print debug head
./devscripts/run_tests.${{ matrix.run-tests-ext }} core
python3 ./devscripts/run_tests.py core

View file

@ -9,16 +9,16 @@ jobs:
if: "contains(github.event.head_commit.message, 'ci run dl')"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install test requirements
run: pip install pytest
run: pip install pytest -r requirements.txt
- name: Run tests
continue-on-error: true
run: ./devscripts/run_tests.sh download
run: python3 ./devscripts/run_tests.py download
full:
name: Full Download Tests
@ -28,24 +28,21 @@ jobs:
fail-fast: true
matrix:
os: [ubuntu-latest]
python-version: ['3.7', '3.10', '3.12', pypy-3.7, pypy-3.8, pypy-3.10]
run-tests-ext: [sh]
python-version: ['3.10', '3.11', '3.12', pypy-3.8, pypy-3.10]
include:
# atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest
python-version: '3.8'
run-tests-ext: bat
- os: windows-latest
python-version: pypy-3.9
run-tests-ext: bat
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install pytest
run: pip install pytest
- name: Install test requirements
run: pip install pytest -r requirements.txt
- name: Run tests
continue-on-error: true
run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} download
run: python3 ./devscripts/run_tests.py download

View file

@ -1,97 +0,0 @@
name: Publish
on:
workflow_call:
inputs:
channel:
default: stable
required: true
type: string
version:
required: true
type: string
target_commitish:
required: true
type: string
prerelease:
default: false
required: true
type: boolean
secrets:
ARCHIVE_REPO_TOKEN:
required: false
permissions:
contents: write
jobs:
publish:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/download-artifact@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Generate release notes
run: |
printf '%s' \
'[![Installation](https://img.shields.io/badge/-Which%20file%20should%20I%20download%3F-white.svg?style=for-the-badge)]' \
'(https://github.com/yt-dlp/yt-dlp#installation "Installation instructions") ' \
'[![Documentation](https://img.shields.io/badge/-Docs-brightgreen.svg?style=for-the-badge&logo=GitBook&labelColor=555555)]' \
'(https://github.com/yt-dlp/yt-dlp/tree/2023.03.04#readme "Documentation") ' \
'[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)]' \
'(https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators "Donate") ' \
'[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)]' \
'(https://discord.gg/H5MNcFW63r "Discord") ' \
${{ inputs.channel != 'nightly' && '"[![Nightly](https://img.shields.io/badge/Get%20nightly%20builds-purple.svg?style=for-the-badge)]" \
"(https://github.com/yt-dlp/yt-dlp-nightly-builds/releases/latest \"Nightly builds\")"' || '' }} \
> ./RELEASE_NOTES
printf '\n\n' >> ./RELEASE_NOTES
cat >> ./RELEASE_NOTES << EOF
#### A description of the various files are in the [README](https://github.com/yt-dlp/yt-dlp#release-files)
---
$(python ./devscripts/make_changelog.py -vv --collapsible)
EOF
printf '%s\n\n' '**This is an automated nightly pre-release build**' >> ./NIGHTLY_NOTES
cat ./RELEASE_NOTES >> ./NIGHTLY_NOTES
printf '%s\n\n' 'Generated from: https://github.com/${{ github.repository }}/commit/${{ inputs.target_commitish }}' >> ./ARCHIVE_NOTES
cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES
- name: Archive nightly release
env:
GH_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
GH_REPO: ${{ vars.ARCHIVE_REPO }}
if: |
inputs.channel == 'nightly' && env.GH_TOKEN != '' && env.GH_REPO != ''
run: |
gh release create \
--notes-file ARCHIVE_NOTES \
--title "yt-dlp nightly ${{ inputs.version }}" \
${{ inputs.version }} \
artifact/*
- name: Prune old nightly release
if: inputs.channel == 'nightly' && !vars.ARCHIVE_REPO
env:
GH_TOKEN: ${{ github.token }}
run: |
gh release delete --yes --cleanup-tag "nightly" || true
git tag --delete "nightly" || true
sleep 5 # Enough time to cover deletion race condition
- name: Publish release${{ inputs.channel == 'nightly' && ' (nightly)' || '' }}
env:
GH_TOKEN: ${{ github.token }}
if: (inputs.channel == 'nightly' && !vars.ARCHIVE_REPO) || inputs.channel != 'nightly'
run: |
gh release create \
--notes-file ${{ inputs.channel == 'nightly' && 'NIGHTLY_NOTES' || 'RELEASE_NOTES' }} \
--target ${{ inputs.target_commitish }} \
--title "yt-dlp ${{ inputs.channel == 'nightly' && 'nightly ' || '' }}${{ inputs.version }}" \
${{ inputs.prerelease && '--prerelease' || '' }} \
${{ inputs.channel == 'nightly' && '"nightly"' || inputs.version }} \
artifact/*

View file

@ -9,23 +9,23 @@ jobs:
if: "!contains(github.event.head_commit.message, 'ci skip all')"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.11
- uses: actions/checkout@v4
- name: Set up Python 3.8
uses: actions/setup-python@v4
with:
python-version: '3.11'
python-version: '3.8'
- name: Install test requirements
run: pip install pytest pycryptodomex
run: pip install pytest -r requirements.txt
- name: Run tests
run: |
python3 -m yt_dlp -v || true
./devscripts/run_tests.sh core
python3 ./devscripts/run_tests.py core
flake8:
name: Linter
if: "!contains(github.event.head_commit.message, 'ci skip all')"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
- name: Install flake8
run: pip install flake8

27
.github/workflows/release-master.yml vendored Normal file
View file

@ -0,0 +1,27 @@
name: Release (master)
on:
push:
branches:
- master
paths:
- "yt_dlp/**.py"
- "!yt_dlp/version.py"
- "setup.py"
- "pyinst.py"
concurrency:
group: release-master
permissions:
contents: read
jobs:
release:
if: vars.BUILD_MASTER != ''
uses: ./.github/workflows/release.yml
with:
prerelease: true
source: master
permissions:
contents: write
packages: write
id-token: write # mandatory for trusted publishing
secrets: inherit

View file

@ -1,52 +1,35 @@
name: Release (nightly)
on:
push:
branches:
- master
paths:
- "yt_dlp/**.py"
- "!yt_dlp/version.py"
concurrency:
group: release-nightly
cancel-in-progress: true
schedule:
- cron: '23 23 * * *'
permissions:
contents: read
jobs:
prepare:
check_nightly:
if: vars.BUILD_NIGHTLY != ''
runs-on: ubuntu-latest
outputs:
version: ${{ steps.get_version.outputs.version }}
commit: ${{ steps.check_for_new_commits.outputs.commit }}
steps:
- uses: actions/checkout@v3
- name: Get version
id: get_version
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check for new commits
id: check_for_new_commits
run: |
python devscripts/update-version.py "$(date -u +"%H%M%S")" | grep -Po "version=\d+(\.\d+){3}" >> "$GITHUB_OUTPUT"
relevant_files=("yt_dlp/*.py" ':!yt_dlp/version.py' "setup.py" "pyinst.py")
echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT"
build:
needs: prepare
uses: ./.github/workflows/build.yml
release:
needs: [check_nightly]
if: ${{ needs.check_nightly.outputs.commit }}
uses: ./.github/workflows/release.yml
with:
version: ${{ needs.prepare.outputs.version }}
channel: nightly
permissions:
contents: read
packages: write # For package cache
secrets:
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
publish:
needs: [prepare, build]
uses: ./.github/workflows/publish.yml
secrets:
ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }}
prerelease: true
source: nightly
permissions:
contents: write
with:
channel: nightly
prerelease: true
version: ${{ needs.prepare.outputs.version }}
target_commitish: ${{ github.sha }}
packages: write
id-token: write # mandatory for trusted publishing
secrets: inherit

View file

@ -1,14 +1,45 @@
name: Release
on:
workflow_dispatch:
workflow_call:
inputs:
version:
description: Version tag (YYYY.MM.DD[.REV])
prerelease:
required: false
default: true
type: boolean
source:
required: false
default: ''
type: string
channel:
description: Update channel (stable/nightly/...)
target:
required: false
default: ''
type: string
version:
required: false
default: ''
type: string
workflow_dispatch:
inputs:
source:
description: |
SOURCE of this release's updates:
channel, repo, tag, or channel/repo@tag
(default: <current_repo>)
required: false
default: ''
type: string
target:
description: |
TARGET to publish this release to:
channel, tag, or channel@tag
(default: <source> if writable else <current_repo>[@source_tag])
required: false
default: ''
type: string
version:
description: |
VERSION: yyyy.mm.dd[.rev] or rev
(default: auto-generated)
required: false
default: ''
type: string
@ -26,12 +57,17 @@ jobs:
contents: write
runs-on: ubuntu-latest
outputs:
channel: ${{ steps.set_channel.outputs.channel }}
version: ${{ steps.update_version.outputs.version }}
channel: ${{ steps.setup_variables.outputs.channel }}
version: ${{ steps.setup_variables.outputs.version }}
target_repo: ${{ steps.setup_variables.outputs.target_repo }}
target_repo_token: ${{ steps.setup_variables.outputs.target_repo_token }}
target_tag: ${{ steps.setup_variables.outputs.target_tag }}
pypi_project: ${{ steps.setup_variables.outputs.pypi_project }}
pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }}
head_sha: ${{ steps.get_target.outputs.head_sha }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
fetch-depth: 0
@ -39,25 +75,123 @@ jobs:
with:
python-version: "3.10"
- name: Set channel
id: set_channel
- name: Process inputs
id: process_inputs
run: |
CHANNEL="${{ github.repository == 'yt-dlp/yt-dlp' && 'stable' || github.repository }}"
echo "channel=${{ inputs.channel || '$CHANNEL' }}" > "$GITHUB_OUTPUT"
cat << EOF
::group::Inputs
prerelease=${{ inputs.prerelease }}
source=${{ inputs.source }}
target=${{ inputs.target }}
version=${{ inputs.version }}
::endgroup::
EOF
IFS='@' read -r source_repo source_tag <<<"${{ inputs.source }}"
IFS='@' read -r target_repo target_tag <<<"${{ inputs.target }}"
cat << EOF >> "$GITHUB_OUTPUT"
source_repo=${source_repo}
source_tag=${source_tag}
target_repo=${target_repo}
target_tag=${target_tag}
EOF
- name: Update version
id: update_version
- name: Setup variables
id: setup_variables
env:
source_repo: ${{ steps.process_inputs.outputs.source_repo }}
source_tag: ${{ steps.process_inputs.outputs.source_tag }}
target_repo: ${{ steps.process_inputs.outputs.target_repo }}
target_tag: ${{ steps.process_inputs.outputs.target_tag }}
run: |
REVISION="${{ vars.PUSH_VERSION_COMMIT == '' && '$(date -u +"%H%M%S")' || '' }}"
REVISION="${{ inputs.prerelease && '$(date -u +"%H%M%S")' || '$REVISION' }}"
python devscripts/update-version.py ${{ inputs.version || '$REVISION' }} | \
grep -Po "version=\d+\.\d+\.\d+(\.\d+)?" >> "$GITHUB_OUTPUT"
# unholy bash monstrosity (sincere apologies)
fallback_token () {
if ${{ !secrets.ARCHIVE_REPO_TOKEN }}; then
echo "::error::Repository access secret ${target_repo_token^^} not found"
exit 1
fi
target_repo_token=ARCHIVE_REPO_TOKEN
return 0
}
source_is_channel=0
[[ "${source_repo}" == 'stable' ]] && source_repo='yt-dlp/yt-dlp'
if [[ -z "${source_repo}" ]]; then
source_repo='${{ github.repository }}'
elif [[ '${{ vars[format('{0}_archive_repo', env.source_repo)] }}' ]]; then
source_is_channel=1
source_channel='${{ vars[format('{0}_archive_repo', env.source_repo)] }}'
elif [[ -z "${source_tag}" && "${source_repo}" != */* ]]; then
source_tag="${source_repo}"
source_repo='${{ github.repository }}'
fi
resolved_source="${source_repo}"
if [[ "${source_tag}" ]]; then
resolved_source="${resolved_source}@${source_tag}"
elif [[ "${source_repo}" == 'yt-dlp/yt-dlp' ]]; then
resolved_source='stable'
fi
revision="${{ (inputs.prerelease || !vars.PUSH_VERSION_COMMIT) && '$(date -u +"%H%M%S")' || '' }}"
version="$(
python devscripts/update-version.py \
-c "${resolved_source}" -r "${{ github.repository }}" ${{ inputs.version || '$revision' }} | \
grep -Po "version=\K\d+\.\d+\.\d+(\.\d+)?")"
if [[ "${target_repo}" ]]; then
if [[ -z "${target_tag}" ]]; then
if [[ '${{ vars[format('{0}_archive_repo', env.target_repo)] }}' ]]; then
target_tag="${source_tag:-${version}}"
else
target_tag="${target_repo}"
target_repo='${{ github.repository }}'
fi
fi
if [[ "${target_repo}" != '${{ github.repository}}' ]]; then
target_repo='${{ vars[format('{0}_archive_repo', env.target_repo)] }}'
target_repo_token='${{ env.target_repo }}_archive_repo_token'
${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}'
fi
else
target_tag="${source_tag:-${version}}"
if ((source_is_channel)); then
target_repo="${source_channel}"
target_repo_token='${{ env.source_repo }}_archive_repo_token'
${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token
pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}'
pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}'
else
target_repo='${{ github.repository }}'
fi
fi
if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then
pypi_project='${{ vars.PYPI_PROJECT }}'
fi
echo "::group::Output variables"
cat << EOF | tee -a "$GITHUB_OUTPUT"
channel=${resolved_source}
version=${version}
target_repo=${target_repo}
target_repo_token=${target_repo_token}
target_tag=${target_tag}
pypi_project=${pypi_project}
pypi_suffix=${pypi_suffix}
EOF
echo "::endgroup::"
- name: Update documentation
env:
version: ${{ steps.setup_variables.outputs.version }}
target_repo: ${{ steps.setup_variables.outputs.target_repo }}
if: |
!inputs.prerelease && env.target_repo == github.repository
run: |
make doc
sed '/### /Q' Changelog.md >> ./CHANGELOG
echo '### ${{ steps.update_version.outputs.version }}' >> ./CHANGELOG
echo '### ${{ env.version }}' >> ./CHANGELOG
python ./devscripts/make_changelog.py -vv -c >> ./CHANGELOG
echo >> ./CHANGELOG
grep -Poz '(?s)### \d+\.\d+\.\d+.+' 'Changelog.md' | head -n -1 >> ./CHANGELOG
@ -65,12 +199,16 @@ jobs:
- name: Push to release
id: push_release
if: ${{ !inputs.prerelease }}
env:
version: ${{ steps.setup_variables.outputs.version }}
target_repo: ${{ steps.setup_variables.outputs.target_repo }}
if: |
!inputs.prerelease && env.target_repo == github.repository
run: |
git config --global user.name github-actions
git config --global user.email github-actions@example.com
git config --global user.name "github-actions[bot]"
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -u
git commit -m "Release ${{ steps.update_version.outputs.version }}" \
git commit -m "Release ${{ env.version }}" \
-m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl"
git push origin --force ${{ github.event.ref }}:release
@ -80,7 +218,10 @@ jobs:
echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT"
- name: Update master
if: vars.PUSH_VERSION_COMMIT != '' && !inputs.prerelease
env:
target_repo: ${{ steps.setup_variables.outputs.target_repo }}
if: |
vars.PUSH_VERSION_COMMIT != '' && !inputs.prerelease && env.target_repo == github.repository
run: git push origin ${{ github.event.ref }}
build:
@ -89,75 +230,148 @@ jobs:
with:
version: ${{ needs.prepare.outputs.version }}
channel: ${{ needs.prepare.outputs.channel }}
origin: ${{ needs.prepare.outputs.target_repo }}
permissions:
contents: read
packages: write # For package cache
secrets:
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}
publish_pypi_homebrew:
publish_pypi:
needs: [prepare, build]
if: ${{ needs.prepare.outputs.pypi_project }}
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install Requirements
run: |
sudo apt-get -y install pandoc man
sudo apt -y install pandoc man
python -m pip install -U pip setuptools wheel twine
python -m pip install -U -r requirements.txt
- name: Prepare
run: |
python devscripts/update-version.py ${{ needs.prepare.outputs.version }}
python devscripts/make_lazy_extractors.py
- name: Build and publish on PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
if: env.TWINE_PASSWORD != '' && !inputs.prerelease
version: ${{ needs.prepare.outputs.version }}
suffix: ${{ needs.prepare.outputs.pypi_suffix }}
channel: ${{ needs.prepare.outputs.channel }}
target_repo: ${{ needs.prepare.outputs.target_repo }}
pypi_project: ${{ needs.prepare.outputs.pypi_project }}
run: |
python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}"
python devscripts/make_lazy_extractors.py
sed -i -E "s/(name=')[^']+(', # package name)/\1${{ env.pypi_project }}\2/" setup.py
- name: Build
run: |
rm -rf dist/*
make pypi-files
python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update"
python setup.py sdist bdist_wheel
twine upload dist/*
- name: Checkout Homebrew repository
env:
BREW_TOKEN: ${{ secrets.BREW_TOKEN }}
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != '' && !inputs.prerelease
uses: actions/checkout@v3
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository: yt-dlp/homebrew-taps
path: taps
ssh-key: ${{ secrets.BREW_TOKEN }}
- name: Update Homebrew Formulae
env:
BREW_TOKEN: ${{ secrets.BREW_TOKEN }}
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != '' && !inputs.prerelease
run: |
python devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ needs.prepare.outputs.version }}"
git -C taps/ config user.name github-actions
git -C taps/ config user.email github-actions@example.com
git -C taps/ commit -am 'yt-dlp: ${{ needs.prepare.outputs.version }}'
git -C taps/ push
verbose: true
publish:
needs: [prepare, build]
uses: ./.github/workflows/publish.yml
permissions:
contents: write
with:
channel: ${{ needs.prepare.outputs.channel }}
prerelease: ${{ inputs.prerelease }}
version: ${{ needs.prepare.outputs.version }}
target_commitish: ${{ needs.prepare.outputs.head_sha }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/download-artifact@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Generate release notes
env:
head_sha: ${{ needs.prepare.outputs.head_sha }}
target_repo: ${{ needs.prepare.outputs.target_repo }}
target_tag: ${{ needs.prepare.outputs.target_tag }}
run: |
printf '%s' \
'[![Installation](https://img.shields.io/badge/-Which%20file%20should%20I%20download%3F-white.svg?style=for-the-badge)]' \
'(https://github.com/${{ github.repository }}#installation "Installation instructions") ' \
'[![Documentation](https://img.shields.io/badge/-Docs-brightgreen.svg?style=for-the-badge&logo=GitBook&labelColor=555555)]' \
'(https://github.com/${{ github.repository }}' \
'${{ env.target_repo == github.repository && format('/tree/{0}', env.target_tag) || '' }}#readme "Documentation") ' \
'[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)]' \
'(https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators "Donate") ' \
'[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)]' \
'(https://discord.gg/H5MNcFW63r "Discord") ' \
${{ env.target_repo == 'yt-dlp/yt-dlp' && '\
"[![Nightly](https://img.shields.io/badge/Get%20nightly%20builds-purple.svg?style=for-the-badge)]" \
"(https://github.com/yt-dlp/yt-dlp-nightly-builds/releases/latest \"Nightly builds\") " \
"[![Master](https://img.shields.io/badge/Get%20master%20builds-lightblue.svg?style=for-the-badge)]" \
"(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES
printf '\n\n' >> ./RELEASE_NOTES
cat >> ./RELEASE_NOTES << EOF
#### A description of the various files are in the [README](https://github.com/${{ github.repository }}#release-files)
---
$(python ./devscripts/make_changelog.py -vv --collapsible)
EOF
printf '%s\n\n' '**This is a pre-release build**' >> ./PRERELEASE_NOTES
cat ./RELEASE_NOTES >> ./PRERELEASE_NOTES
printf '%s\n\n' 'Generated from: https://github.com/${{ github.repository }}/commit/${{ env.head_sha }}' >> ./ARCHIVE_NOTES
cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES
- name: Publish to archive repo
env:
GH_TOKEN: ${{ secrets[needs.prepare.outputs.target_repo_token] }}
GH_REPO: ${{ needs.prepare.outputs.target_repo }}
version: ${{ needs.prepare.outputs.version }}
channel: ${{ needs.prepare.outputs.channel }}
if: |
inputs.prerelease && env.GH_TOKEN != '' && env.GH_REPO != '' && env.GH_REPO != github.repository
run: |
title="${{ startswith(env.GH_REPO, 'yt-dlp/') && 'yt-dlp ' || '' }}${{ env.channel }}"
gh release create \
--notes-file ARCHIVE_NOTES \
--title "${title} ${{ env.version }}" \
${{ env.version }} \
artifact/*
- name: Prune old release
env:
GH_TOKEN: ${{ github.token }}
version: ${{ needs.prepare.outputs.version }}
target_repo: ${{ needs.prepare.outputs.target_repo }}
target_tag: ${{ needs.prepare.outputs.target_tag }}
if: |
env.target_repo == github.repository && env.target_tag != env.version
run: |
gh release delete --yes --cleanup-tag "${{ env.target_tag }}" || true
git tag --delete "${{ env.target_tag }}" || true
sleep 5 # Enough time to cover deletion race condition
- name: Publish release
env:
GH_TOKEN: ${{ github.token }}
version: ${{ needs.prepare.outputs.version }}
target_repo: ${{ needs.prepare.outputs.target_repo }}
target_tag: ${{ needs.prepare.outputs.target_tag }}
head_sha: ${{ needs.prepare.outputs.head_sha }}
if: |
env.target_repo == github.repository
run: |
title="${{ github.repository == 'yt-dlp/yt-dlp' && 'yt-dlp ' || '' }}"
title+="${{ env.target_tag != env.version && format('{0} ', env.target_tag) || '' }}"
gh release create \
--notes-file ${{ inputs.prerelease && 'PRERELEASE_NOTES' || 'RELEASE_NOTES' }} \
--target ${{ env.head_sha }} \
--title "${title}${{ env.version }}" \
${{ inputs.prerelease && '--prerelease' || '' }} \
${{ env.target_tag }} \
artifact/*

View file

@ -140,12 +140,9 @@ # DEVELOPER INSTRUCTIONS
python -m yt_dlp
To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
To run all the available core tests, use:
python -m unittest discover
python test/test_download.py
nosetests
pytest
python devscripts/run_tests.py
See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
@ -187,15 +184,21 @@ ## Adding support for a new site
'url': 'https://yourextractor.com/watch/42',
'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
'info_dict': {
# For videos, only the 'id' and 'ext' fields are required to RUN the test:
'id': '42',
'ext': 'mp4',
'title': 'Video title goes here',
'thumbnail': r're:^https?://.*\.jpg$',
# TODO more properties, either as:
# * A value
# * MD5 checksum; start the string with md5:
# * A regular expression; start the string with re:
# * Any Python type, e.g. int or float
# Then if the test run fails, it will output the missing/incorrect fields.
# Properties can be added as:
# * A value, e.g.
# 'title': 'Video title goes here',
# * MD5 checksum; start the string with 'md5:', e.g.
# 'description': 'md5:098f6bcd4621d373cade4e832627b4f6',
# * A regular expression; start the string with 're:', e.g.
# 'thumbnail': r're:^https?://.*\.jpg$',
# * A count of elements in a list; start the string with 'count:', e.g.
# 'tags': 'count:10',
# * Any Python type, e.g.
# 'view_count': int,
}
}]
@ -215,14 +218,14 @@ ## Adding support for a new site
}
```
1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`.
1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all`
1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
1. Run `python devscripts/run_tests.py YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all`
1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running.
1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want.
1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 yt_dlp/extractor/yourextractor.py
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.7 and above. Backward compatibility is not required for even older versions of Python.
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python.
1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add yt_dlp/extractor/_extractors.py

View file

@ -513,3 +513,32 @@ awalgarg
midnightveil
naginatana
Riteo
1100101
aniolpages
bartbroere
CrendKing
Esokrates
HitomaruKonpaku
LoserFox
peci1
saintliao
shubhexists
SirElderling
almx
elivinsky
starius
TravisDupes
amir16yp
Fymyte
Ganesh910
hashFactory
kclauhk
Kyraminol
lstrojny
middlingphys
NickCis
nicodato
prettykool
S-Aarab
sonmezberkay
TSRBerry

View file

@ -4,6 +4,178 @@ # Changelog
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
### 2023.12.30
#### Core changes
- [Fix format selection parse error for CPython 3.12](https://github.com/yt-dlp/yt-dlp/commit/00cdda4f6fe18712ced13dbc64b7ea10f323e268) ([#8797](https://github.com/yt-dlp/yt-dlp/issues/8797)) by [Grub4K](https://github.com/Grub4K)
- [Let `read_stdin` obey `--quiet`](https://github.com/yt-dlp/yt-dlp/commit/a174c453ee1e853c584ceadeac17eef2bd433dc5) by [pukkandan](https://github.com/pukkandan)
- [Merged with youtube-dl be008e6](https://github.com/yt-dlp/yt-dlp/commit/65de7d204ce88c0225df1321060304baab85dbd8) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [Grub4K](https://github.com/Grub4K)
- [Parse `release_year` from `release_date`](https://github.com/yt-dlp/yt-dlp/commit/1732eccc0a40256e076bf0435a29f0f1d8419280) ([#8524](https://github.com/yt-dlp/yt-dlp/issues/8524)) by [seproDev](https://github.com/seproDev)
- [Release workflow and Updater cleanup](https://github.com/yt-dlp/yt-dlp/commit/632b8ee54eb2df8ac6e20746a0bd95b7ebb053aa) ([#8640](https://github.com/yt-dlp/yt-dlp/issues/8640)) by [bashonly](https://github.com/bashonly)
- [Remove Python 3.7 support](https://github.com/yt-dlp/yt-dlp/commit/f4b95acafcd69a50040730dfdf732e797278fdcc) ([#8361](https://github.com/yt-dlp/yt-dlp/issues/8361)) by [bashonly](https://github.com/bashonly)
- [Support `NO_COLOR` environment variable](https://github.com/yt-dlp/yt-dlp/commit/a0b19d319a6ce8b7059318fa17a34b144fde1785) ([#8385](https://github.com/yt-dlp/yt-dlp/issues/8385)) by [Grub4K](https://github.com/Grub4K), [prettykool](https://github.com/prettykool)
- **outtmpl**: [Support multiplication](https://github.com/yt-dlp/yt-dlp/commit/993edd3f6e17e966c763bc86dc34125445cec6b6) by [pukkandan](https://github.com/pukkandan)
- **utils**: `traverse_obj`: [Move `is_user_input` into output template](https://github.com/yt-dlp/yt-dlp/commit/0b6f829b1dfda15d3c1d7d1fbe4ea6102c26dd24) ([#8673](https://github.com/yt-dlp/yt-dlp/issues/8673)) by [Grub4K](https://github.com/Grub4K)
- **webvtt**: [Allow spaces before newlines for CueBlock](https://github.com/yt-dlp/yt-dlp/commit/15f22b4880b6b3f71f350c64d70976ae65b9f1ca) ([#7681](https://github.com/yt-dlp/yt-dlp/issues/7681)) by [TSRBerry](https://github.com/TSRBerry) (With fixes in [298230e](https://github.com/yt-dlp/yt-dlp/commit/298230e550886b746c266724dd701d842ca2696e) by [pukkandan](https://github.com/pukkandan))
#### Extractor changes
- [Add `media_type` field](https://github.com/yt-dlp/yt-dlp/commit/e370f9ec36972d06100a3db893b397bfc1b07b4d) by [trainman261](https://github.com/trainman261)
- [Extract from `media` elements in SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/ddb2d7588bea48bae965dbfabe6df6550c9d3d43) ([#8504](https://github.com/yt-dlp/yt-dlp/issues/8504)) by [seproDev](https://github.com/seproDev)
- **abematv**: [Fix season metadata](https://github.com/yt-dlp/yt-dlp/commit/cc07f5cc85d9e2a6cd0bedb9d961665eea0d6047) ([#8607](https://github.com/yt-dlp/yt-dlp/issues/8607)) by [middlingphys](https://github.com/middlingphys)
- **allstar**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/3237f8ba29fe13bf95ff42b1e48b5b5109715feb) ([#8274](https://github.com/yt-dlp/yt-dlp/issues/8274)) by [S-Aarab](https://github.com/S-Aarab)
- **altcensored**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3f90813f0617e0d21302398010de7496c9ae36aa) ([#8291](https://github.com/yt-dlp/yt-dlp/issues/8291)) by [drzraf](https://github.com/drzraf)
- **ard**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/5f009a094f0e8450792b097c4c8273622778052d) ([#8878](https://github.com/yt-dlp/yt-dlp/issues/8878)) by [seproDev](https://github.com/seproDev)
- **ardbetamediathek**: [Fix series extraction](https://github.com/yt-dlp/yt-dlp/commit/1f8bd8eba82ba10ddb49ee7cc0be4540dab103d5) ([#8687](https://github.com/yt-dlp/yt-dlp/issues/8687)) by [lstrojny](https://github.com/lstrojny)
- **bbc**
- [Extract more formats](https://github.com/yt-dlp/yt-dlp/commit/c919b68f7e79ea5010f75f648d3c9e45405a8011) ([#8321](https://github.com/yt-dlp/yt-dlp/issues/8321)) by [barsnick](https://github.com/barsnick), [dirkf](https://github.com/dirkf)
- [Fix JSON parsing bug](https://github.com/yt-dlp/yt-dlp/commit/19741ab8a401ec64d5e84fdbfcfb141d105e7bc8) by [bashonly](https://github.com/bashonly)
- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4903f452b68efb62dadf22e81be8c7934fc743e7) ([#8651](https://github.com/yt-dlp/yt-dlp/issues/8651)) by [bashonly](https://github.com/bashonly)
- **bilibili**: [Support courses and interactive videos](https://github.com/yt-dlp/yt-dlp/commit/9f09bdcfcb8e2b4b2decdc30d35d34b993bc7a94) ([#8343](https://github.com/yt-dlp/yt-dlp/issues/8343)) by [c-basalt](https://github.com/c-basalt)
- **bitchute**: [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/b1a1ec1540605d2ea7abdb63336ffb1c56bf6316) ([#8507](https://github.com/yt-dlp/yt-dlp/issues/8507)) by [SirElderling](https://github.com/SirElderling)
- **box**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/5a230233d6fce06f4abd1fce0dc92b948e6f780b) ([#8649](https://github.com/yt-dlp/yt-dlp/issues/8649)) by [bashonly](https://github.com/bashonly)
- **bundestag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/00a3e47bf5440c96025a76e08337ff2a475ed83e) ([#8783](https://github.com/yt-dlp/yt-dlp/issues/8783)) by [Grub4K](https://github.com/Grub4K)
- **drtv**: [Set default ext for m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/f96ab86cd837b1b5823baa87d144e15322ee9298) ([#8590](https://github.com/yt-dlp/yt-dlp/issues/8590)) by [seproDev](https://github.com/seproDev)
- **duoplay**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/66a0127d45033c698bdbedf162cddc55d9e7b906) ([#8542](https://github.com/yt-dlp/yt-dlp/issues/8542)) by [glensc](https://github.com/glensc)
- **eplus**: [Add login support and DRM detection](https://github.com/yt-dlp/yt-dlp/commit/d5d1517e7d838500800d193ac3234b06e89654cd) ([#8661](https://github.com/yt-dlp/yt-dlp/issues/8661)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **facebook**
- [Fix Memories extraction](https://github.com/yt-dlp/yt-dlp/commit/c39358a54bc6675ae0c50b81024e5a086e41656a) ([#8681](https://github.com/yt-dlp/yt-dlp/issues/8681)) by [kclauhk](https://github.com/kclauhk)
- [Improve subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/9cafb9ff17e14475a35c9a58b5bb010c86c9db4b) ([#8296](https://github.com/yt-dlp/yt-dlp/issues/8296)) by [kclauhk](https://github.com/kclauhk)
- **floatplane**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/628fa244bbce2ad39775a5959e99588f30cac152) ([#8639](https://github.com/yt-dlp/yt-dlp/issues/8639)) by [seproDev](https://github.com/seproDev)
- **francetv**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/71f28097fec1c9e029f74b68a4eadc8915399840) ([#8409](https://github.com/yt-dlp/yt-dlp/issues/8409)) by [Fymyte](https://github.com/Fymyte)
- **instagram**: [Fix stories extraction](https://github.com/yt-dlp/yt-dlp/commit/50eaea9fd7787546b53660e736325fa31c77765d) ([#8843](https://github.com/yt-dlp/yt-dlp/issues/8843)) by [bashonly](https://github.com/bashonly)
- **joqrag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/db8b4edc7d0bd27da462f6fe82ff6e13e3d68a04) ([#8384](https://github.com/yt-dlp/yt-dlp/issues/8384)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **litv**: [Fix premium content extraction](https://github.com/yt-dlp/yt-dlp/commit/f45c4efcd928a173e1300a8f1ce4258e70c969b1) ([#8842](https://github.com/yt-dlp/yt-dlp/issues/8842)) by [bashonly](https://github.com/bashonly)
- **maariv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c5f01bf7d4b9426c87c3f8248de23934a56579e0) ([#8331](https://github.com/yt-dlp/yt-dlp/issues/8331)) by [amir16yp](https://github.com/amir16yp)
- **mediastream**: [Fix authenticated format extraction](https://github.com/yt-dlp/yt-dlp/commit/b03c89309eb141be1a1eceeeb7475dd3b7529ad9) ([#8657](https://github.com/yt-dlp/yt-dlp/issues/8657)) by [NickCis](https://github.com/NickCis)
- **nebula**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/45d82be65f71bb05506bd55376c6fdb36bc54142) ([#8566](https://github.com/yt-dlp/yt-dlp/issues/8566)) by [elyse0](https://github.com/elyse0), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
- **nintendo**: [Fix Nintendo Direct extraction](https://github.com/yt-dlp/yt-dlp/commit/1d24da6c899ef280d8b0a48a5e280ecd5d39cdf4) ([#8609](https://github.com/yt-dlp/yt-dlp/issues/8609)) by [Grub4K](https://github.com/Grub4K)
- **ondemandkorea**: [Fix upgraded format extraction](https://github.com/yt-dlp/yt-dlp/commit/04a5e06350e3ef7c03f94f2f3f90dd96c6411152) ([#8677](https://github.com/yt-dlp/yt-dlp/issues/8677)) by [seproDev](https://github.com/seproDev)
- **pr0gramm**: [Support variant formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/f98a3305eb124a0c375d03209d5c5a64fe1766c8) ([#8674](https://github.com/yt-dlp/yt-dlp/issues/8674)) by [Grub4K](https://github.com/Grub4K)
- **rinsefm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c91af948e43570025e4aa887e248fd025abae394) ([#8778](https://github.com/yt-dlp/yt-dlp/issues/8778)) by [hashFactory](https://github.com/hashFactory)
- **rudovideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0d531c35eca4c2eb36e160530a7a333edbc727cc) ([#8664](https://github.com/yt-dlp/yt-dlp/issues/8664)) by [nicodato](https://github.com/nicodato)
- **theguardian**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1fa3f24d4b5d22176b11d78420f1f4b64a5af0a8) ([#8535](https://github.com/yt-dlp/yt-dlp/issues/8535)) by [SirElderling](https://github.com/SirElderling)
- **theplatform**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/7e09c147fdccb44806bbf601573adc4b77210a89) ([#8635](https://github.com/yt-dlp/yt-dlp/issues/8635)) by [trainman261](https://github.com/trainman261)
- **twitcasting**: [Detect livestreams via API and `show` page](https://github.com/yt-dlp/yt-dlp/commit/585d0ed9abcfcb957f2b2684b8ad43c3af160383) ([#8601](https://github.com/yt-dlp/yt-dlp/issues/8601)) by [bashonly](https://github.com/bashonly), [JC-Chung](https://github.com/JC-Chung)
- **twitcastinguser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/ff2fde1b8f922fd34bae6172602008cd67c07c93) ([#8650](https://github.com/yt-dlp/yt-dlp/issues/8650)) by [bashonly](https://github.com/bashonly)
- **twitter**
- [Extract stale tweets](https://github.com/yt-dlp/yt-dlp/commit/1c54a98e19d047e7c15184237b6ef8ad50af489c) ([#8724](https://github.com/yt-dlp/yt-dlp/issues/8724)) by [bashonly](https://github.com/bashonly)
- [Prioritize m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/e7d22348e77367740da78a3db27167ecf894b7c9) ([#8826](https://github.com/yt-dlp/yt-dlp/issues/8826)) by [bashonly](https://github.com/bashonly)
- [Work around API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/116c268438ea4d3738f6fa502c169081ca8f0ee7) ([#8825](https://github.com/yt-dlp/yt-dlp/issues/8825)) by [bashonly](https://github.com/bashonly)
- broadcast: [Extract `concurrent_view_count`](https://github.com/yt-dlp/yt-dlp/commit/6fe82491ed622b948c512cf4aab46ac3a234ae0a) ([#8600](https://github.com/yt-dlp/yt-dlp/issues/8600)) by [sonmezberkay](https://github.com/sonmezberkay)
- **vidly**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/34df1c1f60fa652c0a6a5c712b06c10e45daf6b7) ([#8612](https://github.com/yt-dlp/yt-dlp/issues/8612)) by [seproDev](https://github.com/seproDev)
- **vocaroo**: [Do not use deprecated `getheader`](https://github.com/yt-dlp/yt-dlp/commit/f223b1b0789f65e06619dcc9fc9e74f50d259379) ([#8606](https://github.com/yt-dlp/yt-dlp/issues/8606)) by [qbnu](https://github.com/qbnu)
- **vvvvid**: [Set user-agent to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1725e943b0e8a8b585305660d4611e684374409c) ([#8615](https://github.com/yt-dlp/yt-dlp/issues/8615)) by [Kyraminol](https://github.com/Kyraminol)
- **youtube**
- [Fix `like_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/6b5d93b0b0240e287389d1d43b2d5293e18aa4cc) ([#8763](https://github.com/yt-dlp/yt-dlp/issues/8763)) by [Ganesh910](https://github.com/Ganesh910)
- [Improve detection of faulty HLS formats](https://github.com/yt-dlp/yt-dlp/commit/bb5a54e6db2422bbd155d93a0e105b6616c09467) ([#8646](https://github.com/yt-dlp/yt-dlp/issues/8646)) by [bashonly](https://github.com/bashonly)
- [Return empty playlist when channel/tab has no videos](https://github.com/yt-dlp/yt-dlp/commit/044886c220620a7679109e92352890e18b6079e3) by [pukkandan](https://github.com/pukkandan)
- [Support cf.piped.video](https://github.com/yt-dlp/yt-dlp/commit/6a9c7a2b52655bacfa7ab2da24fd0d14a6fff495) ([#8514](https://github.com/yt-dlp/yt-dlp/issues/8514)) by [OIRNOIR](https://github.com/OIRNOIR)
- **zingmp3**: [Add support for radio and podcasts](https://github.com/yt-dlp/yt-dlp/commit/64de1a4c25bada90374b88d7353754fe8fbfcc51) ([#7189](https://github.com/yt-dlp/yt-dlp/issues/7189)) by [hatienl0i261299](https://github.com/hatienl0i261299)
#### Postprocessor changes
- **ffmpegmetadata**: [Embed stream metadata in single format downloads](https://github.com/yt-dlp/yt-dlp/commit/deeb13eae82e60f82a2c0c5861f460399a997528) ([#8647](https://github.com/yt-dlp/yt-dlp/issues/8647)) by [bashonly](https://github.com/bashonly)
#### Networking changes
- [Strip whitespace around header values](https://github.com/yt-dlp/yt-dlp/commit/196eb0fe77b78e2e5ca02c506c3837c2b1a7964c) ([#8802](https://github.com/yt-dlp/yt-dlp/issues/8802)) by [coletdjnz](https://github.com/coletdjnz)
- **Request Handler**: websockets: [Migrate websockets to networking framework](https://github.com/yt-dlp/yt-dlp/commit/ccfd70f4c24b579c72123ca76ab50164f8f122b7) ([#7720](https://github.com/yt-dlp/yt-dlp/issues/7720)) by [coletdjnz](https://github.com/coletdjnz)
#### Misc. changes
- **ci**
- [Concurrency optimizations](https://github.com/yt-dlp/yt-dlp/commit/f124fa458826308afc86cf364c509f857686ecfd) ([#8614](https://github.com/yt-dlp/yt-dlp/issues/8614)) by [Grub4K](https://github.com/Grub4K)
- [Run core tests only for core changes](https://github.com/yt-dlp/yt-dlp/commit/13b3cb3c2b7169a1e17d6fc62593bf744170521c) ([#8841](https://github.com/yt-dlp/yt-dlp/issues/8841)) by [Grub4K](https://github.com/Grub4K)
- **cleanup**
- [Fix spelling of `IE_NAME`](https://github.com/yt-dlp/yt-dlp/commit/bc4ab17b38f01000d99c5c2bedec89721fee65ec) ([#8810](https://github.com/yt-dlp/yt-dlp/issues/8810)) by [barsnick](https://github.com/barsnick)
- [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/9751a457cfdb18bf99d9ee0d10e4e6a594502bbf) ([#8604](https://github.com/yt-dlp/yt-dlp/issues/8604)) by [seproDev](https://github.com/seproDev)
- Miscellaneous: [f9fb3ce](https://github.com/yt-dlp/yt-dlp/commit/f9fb3ce86e3c6a0c3c33b45392b8d7288bceba76) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
- **devscripts**: `run_tests`: [Create Python script](https://github.com/yt-dlp/yt-dlp/commit/2d1d683a541d71f3d3bb999dfe8eeb1976fb91ce) ([#8720](https://github.com/yt-dlp/yt-dlp/issues/8720)) by [Grub4K](https://github.com/Grub4K) (With fixes in [225cf2b](https://github.com/yt-dlp/yt-dlp/commit/225cf2b830a1de2c5eacd257edd2a01aed1e1114))
- **docs**: [Update youtube-dl merge commit in `README.md`](https://github.com/yt-dlp/yt-dlp/commit/f10589e3453009bb523f55849bba144c9b91cf2a) by [bashonly](https://github.com/bashonly)
- **test**: networking: [Update tests for OpenSSL 3.2](https://github.com/yt-dlp/yt-dlp/commit/37755a037e612bfc608c3d4722e8ef2ce6a022ee) ([#8814](https://github.com/yt-dlp/yt-dlp/issues/8814)) by [bashonly](https://github.com/bashonly)
### 2023.11.16
#### Extractor changes
- **abc.net.au**: iview, showseries: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/15cb3528cbda7b6198f49a6b5953c226d701696b) ([#8586](https://github.com/yt-dlp/yt-dlp/issues/8586)) by [bashonly](https://github.com/bashonly)
- **beatbump**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/21dc069bea2d4d99345dd969e098f4535c751d45) ([#8576](https://github.com/yt-dlp/yt-dlp/issues/8576)) by [seproDev](https://github.com/seproDev)
- **dailymotion**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a489f071508ec5caf5f32052d142afe86c28df7a) ([#7692](https://github.com/yt-dlp/yt-dlp/issues/7692)) by [TravisDupes](https://github.com/TravisDupes)
- **drtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0783fd558ed0d3a8bc754beb75a406256f8b97b2) ([#8484](https://github.com/yt-dlp/yt-dlp/issues/8484)) by [almx](https://github.com/almx), [seproDev](https://github.com/seproDev)
- **eltrecetv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dcfad52812aa8ce007cefbfbe63f58b49f6b1046) ([#8216](https://github.com/yt-dlp/yt-dlp/issues/8216)) by [elivinsky](https://github.com/elivinsky)
- **jiosaavn**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b530118e7f48232cacf8050d79a6b20bdfcf5468) ([#8307](https://github.com/yt-dlp/yt-dlp/issues/8307)) by [awalgarg](https://github.com/awalgarg)
- **njpwworld**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/e569c2d1f4b665795a2b64f0aaf7f76930664233) ([#8570](https://github.com/yt-dlp/yt-dlp/issues/8570)) by [aarubui](https://github.com/aarubui)
- **tv5mondeplus**: [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/0f634dba3afdc429ece8839b02f6d56c27b7973a) ([#4209](https://github.com/yt-dlp/yt-dlp/issues/4209)) by [FrankZ85](https://github.com/FrankZ85)
- **twitcasting**: [Fix livestream detection](https://github.com/yt-dlp/yt-dlp/commit/2325d03aa7bb80f56ba52cd6992258e44727b424) ([#8574](https://github.com/yt-dlp/yt-dlp/issues/8574)) by [JC-Chung](https://github.com/JC-Chung)
- **zenyandex**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5efe68b73cbf6e907c2e6a3aa338664385084184) ([#8454](https://github.com/yt-dlp/yt-dlp/issues/8454)) by [starius](https://github.com/starius)
#### Misc. changes
- **build**: [Make `secretstorage` an optional dependency](https://github.com/yt-dlp/yt-dlp/commit/24f827875c6ba513f12ed09a3aef2bbed223760d) ([#8585](https://github.com/yt-dlp/yt-dlp/issues/8585)) by [bashonly](https://github.com/bashonly)
### 2023.11.14
#### Important changes
- **The release channels have been adjusted!**
* [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel.
* [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes.
- Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x)
- Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers
#### Core changes
- [Add `--compat-option manifest-filesize-approx`](https://github.com/yt-dlp/yt-dlp/commit/10025b715ea01489557eb2c5a3cc04d361fcdb52) ([#8356](https://github.com/yt-dlp/yt-dlp/issues/8356)) by [bashonly](https://github.com/bashonly)
- [Fix format sorting with `--load-info-json`](https://github.com/yt-dlp/yt-dlp/commit/595ea4a99b726b8fe9463e7853b7053978d0544e) ([#8521](https://github.com/yt-dlp/yt-dlp/issues/8521)) by [bashonly](https://github.com/bashonly)
- [Include build origin in verbose output](https://github.com/yt-dlp/yt-dlp/commit/20314dd46f25e0e0a7e985a7804049aefa8b909f) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- [Only ensure playlist thumbnail dir if writing thumbs](https://github.com/yt-dlp/yt-dlp/commit/a40e0b37dfc8c26916b0e01aa3f29f3bc42250b6) ([#8373](https://github.com/yt-dlp/yt-dlp/issues/8373)) by [bashonly](https://github.com/bashonly)
- **update**: [Overhaul self-updater](https://github.com/yt-dlp/yt-dlp/commit/0b6ad22e6a432006a75df968f0283e6c6b3cfae6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- [Do not smuggle `http_headers`](https://github.com/yt-dlp/yt-dlp/commit/f04b5bedad7b281bee9814686bba1762bae092eb) by [coletdjnz](https://github.com/coletdjnz)
- [Do not test truth value of `xml.etree.ElementTree.Element`](https://github.com/yt-dlp/yt-dlp/commit/d4f14a72dc1dd79396e0e80980268aee902b61e4) ([#8582](https://github.com/yt-dlp/yt-dlp/issues/8582)) by [bashonly](https://github.com/bashonly)
- **brilliantpala**: [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/9b5bedf13a3323074daceb0ec6ebb3cc6e0b9684) ([#8352](https://github.com/yt-dlp/yt-dlp/issues/8352)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **generic**: [Improve direct video link ext detection](https://github.com/yt-dlp/yt-dlp/commit/4ce2f29a50fcfb9920e6f2ffe42192945a2bad7e) ([#8340](https://github.com/yt-dlp/yt-dlp/issues/8340)) by [bashonly](https://github.com/bashonly)
- **laxarxames**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/312a2d1e8bc247264f9d85c5ec764e33aa0133b5) ([#8412](https://github.com/yt-dlp/yt-dlp/issues/8412)) by [aniolpages](https://github.com/aniolpages)
- **n-tv.de**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8afd9468b0c822843bc480d366d1c86698daabfb) ([#8414](https://github.com/yt-dlp/yt-dlp/issues/8414)) by [1100101](https://github.com/1100101)
- **neteasemusic**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/46acc418a53470b7f32581b3309c3cb87aa8488d) ([#8531](https://github.com/yt-dlp/yt-dlp/issues/8531)) by [LoserFox](https://github.com/LoserFox)
- **nhk**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/54579be4364e148277c32e20a5c3efc2c3f52f5b) ([#8388](https://github.com/yt-dlp/yt-dlp/issues/8388)) by [garret1317](https://github.com/garret1317)
- **novaembed**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3ff494f6f41c27549420fa88be27555bd449ffdc) ([#8368](https://github.com/yt-dlp/yt-dlp/issues/8368)) by [peci1](https://github.com/peci1)
- **npo**: [Send `POST` request to streams API endpoint](https://github.com/yt-dlp/yt-dlp/commit/8e02a4dcc800f9444e9d461edc41edd7b662f435) ([#8413](https://github.com/yt-dlp/yt-dlp/issues/8413)) by [bartbroere](https://github.com/bartbroere)
- **ondemandkorea**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/05adfd883a4f2ecae0267e670a62a2e45c351aeb) ([#8386](https://github.com/yt-dlp/yt-dlp/issues/8386)) by [seproDev](https://github.com/seproDev)
- **orf**: podcast: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6ba3085616652cbf05d1858efc321fdbfc4c6119) ([#8486](https://github.com/yt-dlp/yt-dlp/issues/8486)) by [Esokrates](https://github.com/Esokrates)
- **polskieradio**: audition: [Fix playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/464327acdb353ceb91d2115163a5a9621b22fe0d) ([#8459](https://github.com/yt-dlp/yt-dlp/issues/8459)) by [shubhexists](https://github.com/shubhexists)
- **qdance**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/177f0d963e4b9db749805c482e6f288354c8be84) ([#8426](https://github.com/yt-dlp/yt-dlp/issues/8426)) by [bashonly](https://github.com/bashonly)
- **radiocomercial**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/ef12dbdcd3e7264bd3d744c1e3107597bd23ad35) ([#8508](https://github.com/yt-dlp/yt-dlp/issues/8508)) by [SirElderling](https://github.com/SirElderling)
- **sbs.co.kr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/25a4bd345a0dcfece6fef752d4537eb403da94d9) ([#8326](https://github.com/yt-dlp/yt-dlp/issues/8326)) by [seproDev](https://github.com/seproDev)
- **theatercomplextown**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/2863fcf2b6876d0c7965ff7d6d9242eea653dc6b) ([#8560](https://github.com/yt-dlp/yt-dlp/issues/8560)) by [bashonly](https://github.com/bashonly)
- **thisav**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/cb480e390d85fb3a598c1b6d5eef3438ce729fc9) ([#8346](https://github.com/yt-dlp/yt-dlp/issues/8346)) by [bashonly](https://github.com/bashonly)
- **thisoldhouse**: [Add login support](https://github.com/yt-dlp/yt-dlp/commit/c76c96677ff6a056f5844a568ef05ee22c46d6f4) ([#8561](https://github.com/yt-dlp/yt-dlp/issues/8561)) by [bashonly](https://github.com/bashonly)
- **twitcasting**: [Fix livestream extraction](https://github.com/yt-dlp/yt-dlp/commit/7b8b1cf5eb8bf44ce70bc24e1f56f0dba2737e98) ([#8427](https://github.com/yt-dlp/yt-dlp/issues/8427)) by [JC-Chung](https://github.com/JC-Chung), [saintliao](https://github.com/saintliao)
- **twitter**
- broadcast
- [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7d337ca977d73a0a6c07ab481ed8faa8f6ff8726) ([#8383](https://github.com/yt-dlp/yt-dlp/issues/8383)) by [HitomaruKonpaku](https://github.com/HitomaruKonpaku)
- [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/f6e97090d2ed9e05441ab0f4bec3559b816d7a00) ([#8475](https://github.com/yt-dlp/yt-dlp/issues/8475)) by [bashonly](https://github.com/bashonly)
- **weibo**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/15b252dfd2c6807fe57afc5a95e59abadb32ccd2) ([#8463](https://github.com/yt-dlp/yt-dlp/issues/8463)) by [c-basalt](https://github.com/c-basalt)
- **weverse**: [Fix login error handling](https://github.com/yt-dlp/yt-dlp/commit/4a601c9eff9fb42e24a4c8da3fa03628e035b35b) ([#8458](https://github.com/yt-dlp/yt-dlp/issues/8458)) by [seproDev](https://github.com/seproDev)
- **youtube**: [Check newly uploaded iOS HLS formats](https://github.com/yt-dlp/yt-dlp/commit/ef79d20dc9d27ac002a7196f073b37f2f2721aed) ([#8336](https://github.com/yt-dlp/yt-dlp/issues/8336)) by [bashonly](https://github.com/bashonly)
- **zoom**: [Extract combined view formats](https://github.com/yt-dlp/yt-dlp/commit/3906de07551fedb00b789345bf24cc27d6ddf128) ([#7847](https://github.com/yt-dlp/yt-dlp/issues/7847)) by [Mipsters](https://github.com/Mipsters)
#### Downloader changes
- **aria2c**: [Remove duplicate `--file-allocation=none`](https://github.com/yt-dlp/yt-dlp/commit/21b25281c51523620706b11bfc1c4a889858e1f2) ([#8332](https://github.com/yt-dlp/yt-dlp/issues/8332)) by [CrendKing](https://github.com/CrendKing)
- **dash**: [Force native downloader for `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/2622c804d1a5accc3045db398e0fc52074f4bdb3) ([#8339](https://github.com/yt-dlp/yt-dlp/issues/8339)) by [bashonly](https://github.com/bashonly)
#### Networking changes
- **Request Handler**: requests: [Add handler for `requests` HTTP library (#3668)](https://github.com/yt-dlp/yt-dlp/commit/8a8b54523addf46dfd50ef599761a81bc22362e6) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K) (With fixes in [4e38e2a](https://github.com/yt-dlp/yt-dlp/commit/4e38e2ae9d7380015349e6aee59c78bb3938befd))
Adds support for HTTPS proxies and persistent connections (keep-alive)
#### Misc. changes
- **build**
- [Include secretstorage in Linux builds](https://github.com/yt-dlp/yt-dlp/commit/9970d74c8383432c6c8779aa47d3253dcf412b14) by [bashonly](https://github.com/bashonly)
- [Overhaul and unify release workflow](https://github.com/yt-dlp/yt-dlp/commit/1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- **ci**
- [Bump `actions/checkout` to v4](https://github.com/yt-dlp/yt-dlp/commit/5438593a35b7b042fc48fe29cad0b9039f07c9bb) by [bashonly](https://github.com/bashonly)
- [Run core tests with dependencies](https://github.com/yt-dlp/yt-dlp/commit/700444c23ddb65f618c2abd942acdc0c58c650b1) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
- **cleanup**
- [Fix changelog typo](https://github.com/yt-dlp/yt-dlp/commit/a9d3f4b20a3533d2a40104c85bc2cc6c2564c800) by [bashonly](https://github.com/bashonly)
- [Update documentation for master and nightly channels](https://github.com/yt-dlp/yt-dlp/commit/a00af29853b8c7350ce086f4cab8c2c9cf2fcf1d) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- Miscellaneous: [b012271](https://github.com/yt-dlp/yt-dlp/commit/b012271d01b59759e4eefeab0308698cd9e7224c) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
- **test**: update: [Implement simple updater unit tests](https://github.com/yt-dlp/yt-dlp/commit/87264d4fdadcddd91289b968dd0e4bf58d449267) by [bashonly](https://github.com/bashonly)
### 2023.10.13
#### Core changes

View file

@ -29,6 +29,7 @@ ## [coletdjnz](https://github.com/coletdjnz)
[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/coletdjnz)
* Improved plugin architecture
* Rewrote the networking infrastructure, implemented support for `requests`
* YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements
* Added support for new websites YoutubeWebArchive, MainStreaming, PRX, nzherald, Mediaklikk, StarTV etc
* Improved/fixed support for Patreon, panopto, gfycat, itv, pbs, SouthParkDE etc
@ -46,16 +47,17 @@ ## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub>
## [bashonly](https://github.com/bashonly)
* `--update-to`, automated release, nightly builds
* `--cookies-from-browser` support for Firefox containers
* Added support for new websites Genius, Kick, NBCStations, Triller, VideoKen etc
* Improved/fixed support for Anvato, Brightcove, Instagram, ParamountPlus, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc
* `--update-to`, self-updater rewrite, automated/nightly/master releases
* `--cookies-from-browser` support for Firefox containers, external downloader cookie handling overhaul
* Added support for new websites like Dacast, Kick, NBCStations, Triller, VideoKen, Weverse, WrestleUniverse etc
* Improved/fixed support for Anvato, Brightcove, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc
## [Grub4K](https://github.com/Grub4K)
[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K) [![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K)
[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K)
* `--update-to`, automated release, nightly builds
* Rework internals like `traverse_obj`, various core refactors and bugs fixes
* Helped fix crunchyroll, Twitter, wrestleuniverse, wistia, slideslive etc
* `--update-to`, self-updater rewrite, automated/nightly/master releases
* Reworked internals like `traverse_obj`, various core refactors and bugs fixes
* Implemented proper progress reporting for parallel downloads
* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc

View file

@ -76,7 +76,7 @@
# NEW FEATURES
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
@ -121,7 +121,7 @@ # NEW FEATURES
* **Self updater**: The releases can be updated using `yt-dlp -U`, and downgraded using `--update-to` if required
* **Nightly builds**: [Automated nightly builds](#update-channels) can be used with `--update-to nightly`
* **Automated builds**: [Nightly/master builds](#update-channels) can be used with `--update-to nightly` and `--update-to master`
See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes
@ -131,7 +131,7 @@ ### Differences in default behavior
Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc:
* yt-dlp supports only [Python 3.7+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743)
* yt-dlp supports only [Python 3.8+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743)
* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details
* `avconv` is not supported as an alternative to `ffmpeg`
* yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations
@ -159,14 +159,15 @@ ### Differences in default behavior
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
* The sub-module `swfinterp` is removed.
For ease of use, a few more compat options are available:
* `--compat-options all`: Use all compat options (Do NOT use)
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter`
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler`. Use this to enable all future compat options
* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
# INSTALLATION
@ -193,9 +194,11 @@ ## UPDATE
<a id="update-channels"/>
There are currently two release channels for binaries, `stable` and `nightly`.
`stable` is the default channel, and many of its changes have been tested by users of the nightly channel.
The `nightly` channel has releases built after each push to the master branch, and will have the most recent fixes and additions, but also have more risk of regressions. They are available in [their own repo](https://github.com/yt-dlp/yt-dlp-nightly-builds/releases).
There are currently three release channels for binaries: `stable`, `nightly` and `master`.
* `stable` is the default channel, and many of its changes have been tested by users of the `nightly` and `master` channels.
* The `nightly` channel has releases scheduled to build every day around midnight UTC, for a snapshot of the project's new patches and changes. This is the **recommended channel for regular users** of yt-dlp. The `nightly` releases are available from [yt-dlp/yt-dlp-nightly-builds](https://github.com/yt-dlp/yt-dlp-nightly-builds/releases) or as development releases of the `yt-dlp` PyPI package (which can be installed with pip's `--pre` flag).
* The `master` channel features releases that are built after each push to the master branch, and these will have the very latest fixes and additions, but may also be more prone to regressions. They are available from [yt-dlp/yt-dlp-master-builds](https://github.com/yt-dlp/yt-dlp-master-builds/releases).
When using `--update`/`-U`, a release binary will only update to its current channel.
`--update-to CHANNEL` can be used to switch to a different channel when a newer version is available. `--update-to [CHANNEL@]TAG` can also be used to upgrade or downgrade to specific tags from a channel.
@ -203,10 +206,19 @@ ## UPDATE
You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories.
Example usage:
* `yt-dlp --update-to nightly` change to `nightly` channel and update to its latest release
* `yt-dlp --update-to stable@2023.02.17` upgrade/downgrade to release to `stable` channel tag `2023.02.17`
* `yt-dlp --update-to 2023.01.06` upgrade/downgrade to tag `2023.01.06` if it exists on the current channel
* `yt-dlp --update-to example/yt-dlp@2023.03.01` upgrade/downgrade to the release from the `example/yt-dlp` repository, tag `2023.03.01`
* `yt-dlp --update-to master` switch to the `master` channel and update to its latest release
* `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06`
* `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel
* `yt-dlp --update-to example/yt-dlp@2023.09.24` upgrade/downgrade to the release from the `example/yt-dlp` repository, tag `2023.09.24`
**Important**: Any user experiencing an issue with the `stable` release should install or update to the `nightly` release before submitting a bug report:
```
# To update to nightly from stable executable/binary:
yt-dlp --update-to nightly
# To install nightly with pip:
python -m pip install -U --pre yt-dlp
```
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
## RELEASE FILES
@ -255,7 +267,7 @@ #### Misc
**Note**: The manpages, shell completion (autocomplete) files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
## DEPENDENCIES
Python versions 3.7+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
Python versions 3.8+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly.
<!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created
<!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x>
@ -268,7 +280,7 @@ ### Strongly recommended
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
There are bugs in ffmpeg that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
**Important**: What you need is ffmpeg *binary*, **NOT** [the python package of the same name](https://pypi.org/project/ffmpeg)
@ -288,7 +300,7 @@ ### Misc
* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst)
* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD)
* [**secretstorage**](https://github.com/mitya57/secretstorage) - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE)
* Any external downloader that you want to use with `--downloader`
### Deprecated
@ -323,7 +335,7 @@ ### Standalone PyInstaller Builds
**Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly.
### Platform-independent Binary (UNIX)
You will need the build tools `python` (3.7+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
After installing these, simply run `make`.
@ -369,7 +381,8 @@ ## General Options:
CHANNEL can be a repository as well. CHANNEL
and TAG default to "stable" and "latest"
respectively if omitted; See "UPDATE" for
details. Supported channels: stable, nightly
details. Supported channels: stable,
nightly, master
-i, --ignore-errors Ignore download and postprocessing errors.
The download will be considered successful
even if the postprocessing fails
@ -1256,7 +1269,7 @@ # OUTPUT TEMPLATE
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. E.g. `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
@ -1297,6 +1310,7 @@ # OUTPUT TEMPLATE
- `upload_date` (string): Video upload date in UTC (YYYYMMDD)
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
- `release_date` (string): The date (YYYYMMDD) when the video was released in UTC
- `release_year` (numeric): Year (YYYY) when the video or album was released
- `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
- `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC
- `uploader_id` (string): Nickname or id of the video uploader
@ -1320,6 +1334,7 @@ # OUTPUT TEMPLATE
- `was_live` (boolean): Whether this video was originally a live stream
- `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites
- `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public"
- `media_type` (string): The type of media as classified by the site, e.g. "episode", "clip", "trailer"
- `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
- `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- `extractor` (string): Name of the extractor
@ -1370,7 +1385,6 @@ # OUTPUT TEMPLATE
- `album_type` (string): Type of the album
- `album_artist` (string): List of all artists appeared on the album
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
- `release_year` (numeric): Year (YYYY) when the album was released
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
@ -1874,6 +1888,9 @@ #### nhkradirulive (NHK らじる★らじる LIVE)
#### nflplusreplay
* `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default
#### jiosaavn
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
**Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->

View file

@ -98,5 +98,27 @@
"action": "add",
"when": "61bdf15fc7400601c3da1aa7a43917310a5bf391",
"short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands."
},
{
"action": "change",
"when": "8a8b54523addf46dfd50ef599761a81bc22362e6",
"short": "[rh:requests] Add handler for `requests` HTTP library (#3668)\n\n\tAdds support for HTTPS proxies and persistent connections (keep-alive)",
"authors": ["bashonly", "coletdjnz", "Grub4K"]
},
{
"action": "add",
"when": "1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa",
"short": "[priority] **The release channels have been adjusted!**\n\t* [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel.\n\t* [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes."
},
{
"action": "add",
"when": "f04b5bedad7b281bee9814686bba1762bae092eb",
"short": "[priority] Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x)\n\t- Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers"
},
{
"action": "change",
"when": "15f22b4880b6b3f71f350c64d70976ae65b9f1ca",
"short": "[webvtt] Allow spaces before newlines for CueBlock (#7681)",
"authors": ["TSRBerry"]
}
]

View file

@ -40,20 +40,6 @@ def subgroup_lookup(cls):
return {
name: group
for group, names in {
cls.CORE: {
'aes',
'cache',
'compat_utils',
'compat',
'cookies',
'dependencies',
'formats',
'jsinterp',
'outtmpl',
'plugins',
'update',
'utils',
},
cls.MISC: {
'build',
'ci',
@ -404,9 +390,9 @@ def groups(self):
if not group:
if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
group = CommitGroup.EXTRACTOR
logger.error(f'Assuming [ie] group for {commit.short!r}')
else:
group = CommitGroup.POSTPROCESSOR
logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}')
group = CommitGroup.CORE
commit_info = CommitInfo(
details, sub_details, message.strip(),

View file

@ -9,12 +9,7 @@
import re
from devscripts.utils import (
get_filename_args,
read_file,
read_version,
write_file,
)
from devscripts.utils import get_filename_args, read_file, write_file
VERBOSE_TMPL = '''
- type: checkboxes
@ -35,19 +30,18 @@
description: |
It should start like this:
placeholder: |
[debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i']
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version %(version)s [9d339c4] (win32_exe)
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: %(version)s, Current version: %(version)s
yt-dlp is up to date (%(version)s)
[debug] Request Handlers: urllib, requests
[debug] Loaded 1893 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines>
render: shell
validations:
@ -66,7 +60,7 @@
def main():
fields = {'version': read_version(), 'no_skip': NO_SKIP}
fields = {'no_skip': NO_SKIP}
fields['verbose'] = VERBOSE_TMPL % fields
fields['verbose_optional'] = re.sub(r'(\n\s+validations:)?\n\s+required: true', '', fields['verbose'])

View file

@ -1,17 +1,4 @@
@setlocal
@echo off
cd /d %~dp0..
if ["%~1"]==[""] (
set "test_set="test""
) else if ["%~1"]==["core"] (
set "test_set="-m not download""
) else if ["%~1"]==["download"] (
set "test_set="-m "download""
) else (
echo.Invalid test type "%~1". Use "core" ^| "download"
exit /b 1
)
set PYTHONWARNINGS=error
pytest %test_set%
>&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead
python %~dp0run_tests.py %~1

71
devscripts/run_tests.py Executable file
View file

@ -0,0 +1,71 @@
#!/usr/bin/env python3
import argparse
import functools
import os
import re
import subprocess
import sys
from pathlib import Path
fix_test_name = functools.partial(re.compile(r'IE(_all|_\d+)?$').sub, r'\1')
def parse_args():
parser = argparse.ArgumentParser(description='Run selected yt-dlp tests')
parser.add_argument(
'test', help='a extractor tests, or one of "core" or "download"', nargs='*')
parser.add_argument(
'-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
return parser.parse_args()
def run_tests(*tests, pattern=None, ci=False):
run_core = 'core' in tests or (not pattern and not tests)
run_download = 'download' in tests
tests = list(map(fix_test_name, tests))
arguments = ['pytest', '-Werror', '--tb=short']
if ci:
arguments.append('--color=yes')
if run_core:
arguments.extend(['-m', 'not download'])
elif run_download:
arguments.extend(['-m', 'download'])
elif pattern:
arguments.extend(['-k', pattern])
else:
arguments.extend(
f'test/test_download.py::TestDownload::test_{test}' for test in tests)
print(f'Running {arguments}', flush=True)
try:
return subprocess.call(arguments)
except FileNotFoundError:
pass
arguments = [sys.executable, '-Werror', '-m', 'unittest']
if run_core:
print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True)
return 1
elif run_download:
arguments.append('test.test_download')
elif pattern:
arguments.extend(['-k', pattern])
else:
arguments.extend(
f'test.test_download.TestDownload.test_{test}' for test in tests)
print(f'Running {arguments}', flush=True)
return subprocess.call(arguments)
if __name__ == '__main__':
try:
args = parse_args()
os.chdir(Path(__file__).parent.parent)
sys.exit(run_tests(*args.test, pattern=args.k, ci=bool(os.getenv('CI'))))
except KeyboardInterrupt:
pass

View file

@ -1,14 +1,4 @@
#!/usr/bin/env sh
if [ -z "$1" ]; then
test_set='test'
elif [ "$1" = 'core' ]; then
test_set="-m not download"
elif [ "$1" = 'download' ]; then
test_set="-m download"
else
echo 'Invalid test type "'"$1"'". Use "core" | "download"'
exit 1
fi
python3 -bb -Werror -m pytest "$test_set"
>&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead'
python3 devscripts/run_tests.py "$1"

View file

@ -1,39 +0,0 @@
#!/usr/bin/env python3
"""
Usage: python3 ./devscripts/update-formulae.py <path-to-formulae-rb> <version>
version can be either 0-aligned (yt-dlp version) or normalized (PyPi version)
"""
# Allow direct execution
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import json
import re
import urllib.request
from devscripts.utils import read_file, write_file
filename, version = sys.argv[1:]
normalized_version = '.'.join(str(int(x)) for x in version.split('.'))
pypi_release = json.loads(urllib.request.urlopen(
'https://pypi.org/pypi/yt-dlp/%s/json' % normalized_version
).read().decode())
tarball_file = next(x for x in pypi_release['urls'] if x['filename'].endswith('.tar.gz'))
sha256sum = tarball_file['digests']['sha256']
url = tarball_file['url']
formulae_text = read_file(filename)
formulae_text = re.sub(r'sha256 "[0-9a-f]*?"', 'sha256 "%s"' % sha256sum, formulae_text, count=1)
formulae_text = re.sub(r'url "[^"]*?"', 'url "%s"' % url, formulae_text, count=1)
write_file(filename, formulae_text)

View file

@ -20,7 +20,7 @@ def get_new_version(version, revision):
version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
if revision:
assert revision.isdigit(), 'Revision must be a number'
assert revision.isdecimal(), 'Revision must be a number'
else:
old_version = read_version().split('.')
if version.split('.') == old_version[:3]:
@ -46,6 +46,10 @@ def get_git_head():
UPDATE_HINT = None
CHANNEL = {channel!r}
ORIGIN = {origin!r}
_pkg_version = {package_version!r}
'''
if __name__ == '__main__':
@ -53,6 +57,12 @@ def get_git_head():
parser.add_argument(
'-c', '--channel', default='stable',
help='Select update channel (default: %(default)s)')
parser.add_argument(
'-r', '--origin', default='local',
help='Select origin/repository (default: %(default)s)')
parser.add_argument(
'-s', '--suffix', default='',
help='Add an alphanumeric suffix to the package version, e.g. "dev"')
parser.add_argument(
'-o', '--output', default='yt_dlp/version.py',
help='The output file to write to (default: %(default)s)')
@ -66,6 +76,7 @@ def get_git_head():
args.version if args.version and '.' in args.version
else get_new_version(None, args.version))
write_file(args.output, VERSION_TEMPLATE.format(
version=version, git_head=git_head, channel=args.channel))
version=version, git_head=git_head, channel=args.channel, origin=args.origin,
package_version=f'{version}{args.suffix}'))
print(f'version={version} ({args.channel}), head={git_head}')

View file

@ -13,10 +13,11 @@ def write_file(fname, content, mode='w'):
return f.write(content)
def read_version(fname='yt_dlp/version.py'):
def read_version(fname='yt_dlp/version.py', varname='__version__'):
"""Get the version without importing the package"""
exec(compile(read_file(fname), fname, 'exec'))
return locals()['__version__']
items = {}
exec(compile(read_file(fname), fname, 'exec'), items)
return items[varname]
def get_filename_args(has_infile=False, default_outfile=None):

View file

@ -1,8 +1,8 @@
mutagen
pycryptodomex
websockets
brotli; platform_python_implementation=='CPython'
brotlicffi; platform_python_implementation!='CPython'
brotli; implementation_name=='cpython'
brotlicffi; implementation_name!='cpython'
certifi
requests>=2.31.0,<3
urllib3>=1.26.17,<3
urllib3>=1.26.17,<3
websockets>=12.0

View file

@ -26,7 +26,7 @@ markers =
[tox:tox]
skipsdist = true
envlist = py{36,37,38,39,310,311},pypy{36,37,38,39}
envlist = py{38,39,310,311,312},pypy{38,39,310}
skip_missing_interpreters = true
[testenv] # tox
@ -39,7 +39,7 @@ setenv =
[isort]
py_version = 37
py_version = 38
multi_line_output = VERTICAL_HANGING_INDENT
line_length = 80
reverse_relative = true

View file

@ -18,7 +18,7 @@
from devscripts.utils import read_file, read_version
VERSION = read_version()
VERSION = read_version(varname='_pkg_version')
DESCRIPTION = 'A youtube-dl fork with additional features and patches'
@ -142,7 +142,7 @@ def main():
params = build_params()
setup(
name='yt-dlp',
name='yt-dlp', # package name (do not change/remove comment)
version=VERSION,
maintainer='pukkandan',
maintainer_email='pukkandan.ytdlp@gmail.com',
@ -152,7 +152,7 @@ def main():
url='https://github.com/yt-dlp/yt-dlp',
packages=packages(),
install_requires=REQUIREMENTS,
python_requires='>=3.7',
python_requires='>=3.8',
project_urls={
'Documentation': 'https://github.com/yt-dlp/yt-dlp#readme',
'Source': 'https://github.com/yt-dlp/yt-dlp',
@ -164,11 +164,11 @@ def main():
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'Programming Language :: Python',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: Implementation',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',

View file

@ -1,6 +1,4 @@
# Supported sites
- **0000studio:archive**
- **0000studio:clip**
- **17live**
- **17live:clip**
- **1News**: 1news.co.nz article videos
@ -9,7 +7,6 @@ # Supported sites
- **23video**
- **247sports**
- **24tv.ua**
- **24video**
- **3qsdn**: 3Q SDN
- **3sat**
- **4tube**
@ -50,15 +47,18 @@ # Supported sites
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
- **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com
- **afreecatv:user**
- **AirMozilla**
- **AirTV**
- **AitubeKZVideo**
- **AliExpressLive**
- **AlJazeera**
- **Allocine**
- **Allstar**
- **AllstarProfile**
- **AlphaPorno**
- **Alsace20TV**
- **Alsace20TVEmbed**
- **altcensored**
- **altcensored:channel**
- **Alura**: [*alura*](## "netrc machine")
- **AluraCourse**: [*aluracourse*](## "netrc machine")
- **Amara**
@ -79,7 +79,7 @@ # Supported sites
- **ant1newsgr:embed**: ant1news.gr embedded videos
- **antenna:watch**: antenna.gr and ant1news.gr videos
- **Anvato**
- **aol.com**: Yahoo screen and movies
- **aol.com**: Yahoo screen and movies (**Currently broken**)
- **APA**
- **Aparat**
- **AppleConnect**
@ -90,8 +90,8 @@ # Supported sites
- **archive.org**: archive.org video and audio
- **ArcPublishing**
- **ARD**
- **ARD:mediathek**
- **ARDBetaMediathek**
- **ARDMediathek**
- **ARDMediathekCollection**
- **Arkena**
- **arte.sky.it**
- **ArteTV**
@ -100,7 +100,6 @@ # Supported sites
- **ArteTVPlaylist**
- **AtresPlayer**: [*atresplayer*](## "netrc machine")
- **AtScaleConfEvent**
- **ATTTechChannel**
- **ATVAt**
- **AudiMedia**
- **AudioBoom**
@ -140,12 +139,12 @@ # Supported sites
- **BeatBumpVideo**
- **Beatport**
- **Beeg**
- **BehindKink**
- **BehindKink**: (**Currently broken**)
- **Bellator**
- **BellMedia**
- **BerufeTV**
- **Bet**
- **bfi:player**
- **Bet**: (**Currently broken**)
- **bfi:player**: (**Currently broken**)
- **bfmtv**
- **bfmtv:article**
- **bfmtv:live**
@ -162,6 +161,8 @@ # Supported sites
- **BiliBiliBangumi**
- **BiliBiliBangumiMedia**
- **BiliBiliBangumiSeason**
- **BilibiliCheese**
- **BilibiliCheeseSeason**
- **BilibiliCollectionList**
- **BilibiliFavoritesList**
- **BiliBiliPlayer**
@ -176,11 +177,8 @@ # Supported sites
- **BiliLive**
- **BioBioChileTV**
- **Biography**
- **BIQLE**
- **BitChute**
- **BitChuteChannel**
- **bitwave:replay**
- **bitwave:stream**
- **BlackboardCollaborate**
- **BleacherReport**
- **BleacherReportCMS**
@ -193,7 +191,7 @@ # Supported sites
- **Box**
- **BoxCastVideo**
- **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk
- **BR**: Bayerischer Rundfunk (**Currently broken**)
- **BrainPOP**: [*brainpop*](## "netrc machine")
- **BrainPOPELL**: [*brainpop*](## "netrc machine")
- **BrainPOPEsp**: [*brainpop*](## "netrc machine") BrainPOP Español
@ -201,19 +199,18 @@ # Supported sites
- **BrainPOPIl**: [*brainpop*](## "netrc machine") BrainPOP Hebrew
- **BrainPOPJr**: [*brainpop*](## "netrc machine")
- **BravoTV**
- **Break**
- **BreitBart**
- **brightcove:legacy**
- **brightcove:new**
- **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org
- **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org
- **BRMediathek**: Bayerischer Rundfunk Mediathek
- **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **Bundesliga**
- **Bundestag**
- **BusinessInsider**
- **BuzzFeed**
- **BYUtv**
- **BYUtv**: (**Currently broken**)
- **CableAV**
- **Callin**
- **Caltrans**
@ -225,14 +222,11 @@ # Supported sites
- **CamModels**
- **Camsoda**
- **CamtasiaEmbed**
- **CamWithHer**
- **Canal1**
- **CanalAlpha**
- **canalc2.tv**
- **Canalplus**: mycanal.fr and piwiplus.fr
- **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
- **CarambaTV**
- **CarambaTVPage**
- **CartoonNetwork**
- **cbc.ca**
- **cbc.ca:player**
@ -254,16 +248,12 @@ # Supported sites
- **Cellebrite**
- **CeskaTelevize**
- **CGTN**
- **channel9**: Channel 9
- **CharlieRose**
- **Chaturbate**
- **Chilloutzone**
- **Chingari**
- **ChingariUser**
- **chirbit**
- **chirbit:profile**
- **cielotv.it**
- **Cinchcast**
- **Cinemax**
- **CinetecaMilano**
- **Cineverse**
@ -276,14 +266,12 @@ # Supported sites
- **cliphunter**
- **Clippit**
- **ClipRs**
- **Clipsyndicate**
- **ClipYouEmbed**
- **CloserToTruth**
- **CloudflareStream**
- **Cloudy**
- **Clubic**
- **Clubic**: (**Currently broken**)
- **Clyp**
- **cmt.com**
- **cmt.com**: (**Currently broken**)
- **CNBC**
- **CNBCVideo**
- **CNN**
@ -328,7 +316,6 @@ # Supported sites
- **CybraryCourse**: [*cybrary*](## "netrc machine")
- **DacastPlaylist**
- **DacastVOD**
- **Daftsex**
- **DagelijkseKost**: dagelijksekost.een.be
- **DailyMail**
- **dailymotion**: [*dailymotion*](## "netrc machine")
@ -347,13 +334,12 @@ # Supported sites
- **DctpTv**
- **DeezerAlbum**
- **DeezerPlaylist**
- **defense.gouv.fr**
- **democracynow**
- **DestinationAmerica**
- **DetikEmbed**
- **DeuxM**
- **DeuxMNews**
- **DHM**: Filmarchiv - Deutsches Historisches Museum
- **DHM**: Filmarchiv - Deutsches Historisches Museum (**Currently broken**)
- **Digg**
- **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor
- **DigitallySpeaking**
@ -373,7 +359,6 @@ # Supported sites
- **dlf:corpus**: DLF Multi-feed Archives
- **dlive:stream**
- **dlive:vod**
- **Dotsub**
- **Douyin**
- **DouyuShow**
- **DouyuTV**: 斗鱼直播
@ -392,34 +377,29 @@ # Supported sites
- **duboku**: www.duboku.io
- **duboku:list**: www.duboku.io entire series
- **Dumpert**
- **Duoplay**
- **dvtv**: http://video.aktualne.cz/
- **dw**
- **dw:article**
- **EaglePlatform**
- **EbaumsWorld**
- **Ebay**
- **EchoMsk**
- **egghead:course**: egghead.io course
- **egghead:lesson**: egghead.io lesson
- **ehftv**
- **eHow**
- **EinsUndEinsTV**: [*1und1tv*](## "netrc machine")
- **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine")
- **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine")
- **Einthusan**
- **eitb.tv**
- **ElevenSports**
- **EllenTube**
- **EllenTubePlaylist**
- **EllenTubeVideo**
- **Elonet**
- **ElPais**: El País
- **ElTreceTV**: El Trece TV (Argentina)
- **Embedly**
- **EMPFlix**
- **Engadget**
- **Epicon**
- **EpiconSeries**
- **eplus:inbound**: e+ (イープラス) overseas
- **EpidemicSound**
- **eplus**: [*eplus*](## "netrc machine") e+ (イープラス)
- **Epoch**
- **Eporner**
- **Erocast**
@ -428,11 +408,9 @@ # Supported sites
- **ertflix**: ERTFLIX videos
- **ertflix:codename**: ERTFLIX videos by codename
- **ertwebtv:embed**: ert.gr webtv embedded videos
- **Escapist**
- **ESPN**
- **ESPNArticle**
- **ESPNCricInfo**
- **EsriVideo**
- **EttuTv**
- **Europa**
- **EuroParlWebstream**
@ -442,9 +420,7 @@ # Supported sites
- **EWETV**: [*ewetv*](## "netrc machine")
- **EWETVLive**: [*ewetv*](## "netrc machine")
- **EWETVRecordings**: [*ewetv*](## "netrc machine")
- **ExpoTV**
- **Expressen**
- **ExtremeTube**
- **EyedoTV**
- **facebook**: [*facebook*](## "netrc machine")
- **facebook:reel**
@ -464,6 +440,8 @@ # Supported sites
- **FiveThirtyEight**
- **FiveTV**
- **Flickr**
- **Floatplane**
- **FloatplaneChannel**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **FoodNetwork**
- **FootyRoom**
@ -471,7 +449,6 @@ # Supported sites
- **FOX**
- **FOX9**
- **FOX9News**
- **Foxgay**
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
- **FoxNewsVideo**
@ -495,7 +472,6 @@ # Supported sites
- **funimation:show**: [*funimation*](## "netrc machine")
- **Funk**
- **Funker530**
- **Fusion**
- **Fux**
- **FuyinTV**
- **Gab**
@ -521,7 +497,6 @@ # Supported sites
- **GeniusLyrics**
- **Gettr**
- **GettrStreaming**
- **Gfycat**
- **GiantBomb**
- **Giga**
- **GlattvisionTV**: [*glattvisiontv*](## "netrc machine")
@ -563,7 +538,6 @@ # Supported sites
- **HearThisAt**
- **Heise**
- **HellPorno**
- **Helsinki**: helsinki.fi
- **hetklokhuis**
- **hgtv.com:show**
- **HGTVDe**
@ -572,8 +546,6 @@ # Supported sites
- **HistoricFilms**
- **history:player**
- **history:topic**: History.com Topic
- **hitbox**
- **hitbox:live**
- **HitRecord**
- **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
- **HollywoodReporter**
@ -584,8 +556,6 @@ # Supported sites
- **hotstar:playlist**
- **hotstar:season**
- **hotstar:series**
- **Howcast**
- **HowStuffWorks**
- **hrfernsehen**
- **HRTi**: [*hrti*](## "netrc machine")
- **HRTiPlaylist**: [*hrti*](## "netrc machine")
@ -607,7 +577,7 @@ # Supported sites
- **ign.com**
- **IGNArticle**
- **IGNVideo**
- **IHeartRadio**
- **iheartradio**
- **iheartradio:podcast**
- **Iltalehti**
- **imdb**: Internet Movie Database trailers
@ -637,7 +607,6 @@ # Supported sites
- **IsraelNationalNews**
- **ITProTV**
- **ITProTVCourse**
- **ITTF**
- **ITV**
- **ITVBTCC**
- **ivi**: ivi.ru
@ -654,7 +623,10 @@ # Supported sites
- **Jamendo**
- **JamendoAlbum**
- **JeuxVideo**
- **JioSaavnAlbum**
- **JioSaavnSong**
- **Joj**
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
- **Jove**
- **JStream**
- **JTBC**: jtbc.co.kr
@ -667,7 +639,6 @@ # Supported sites
- **Karaoketv**
- **KarriereVideos**
- **Katsomo**
- **KeezMovies**
- **KelbyOne**
- **Ketnet**
- **khanacademy**
@ -676,7 +647,7 @@ # Supported sites
- **Kicker**
- **KickStarter**
- **KickVOD**
- **KinjaEmbed**
- **kinja:embed**
- **KinoPoisk**
- **Kommunetv**
- **KompasVideo**
@ -695,11 +666,10 @@ # Supported sites
- **la7.it**
- **la7.it:pod:episode**
- **la7.it:podcast**
- **laola1tv**
- **laola1tv:embed**
- **LastFM**
- **LastFMPlaylist**
- **LastFMUser**
- **LaXarxaMes**: [*laxarxames*](## "netrc machine")
- **lbry**
- **lbry:channel**
- **lbry:playlist**
@ -729,7 +699,6 @@ # Supported sites
- **LinkedIn**: [*linkedin*](## "netrc machine")
- **linkedin:learning**: [*linkedin*](## "netrc machine")
- **linkedin:learning:course**: [*linkedin*](## "netrc machine")
- **LinuxAcademy**: [*linuxacademy*](## "netrc machine")
- **Liputan6**
- **ListenNotes**
- **LiTV**
@ -747,7 +716,7 @@ # Supported sites
- **Lumni**
- **lynda**: [*lynda*](## "netrc machine") lynda.com videos
- **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses
- **m6**
- **maariv.co.il**
- **MagellanTV**
- **MagentaMusik360**
- **mailru**: Видео@Mail.Ru
@ -789,11 +758,8 @@ # Supported sites
- **megatvcom:embed**: megatv.com embedded videos
- **Meipai**: 美拍
- **MelonVOD**
- **META**
- **metacafe**
- **Metacritic**
- **mewatch**
- **Mgoon**
- **MiaoPai**
- **MicrosoftEmbed**
- **microsoftstream**: Microsoft Stream
@ -806,7 +772,6 @@ # Supported sites
- **minds:group**
- **MinistryGrid**
- **Minoto**
- **miomio.tv**
- **mirrativ**
- **mirrativ:user**
- **MirrorCoUK**
@ -821,14 +786,10 @@ # Supported sites
- **MLBTV**: [*mlb*](## "netrc machine")
- **MLBVideo**
- **MLSSoccer**
- **Mnet**
- **MNetTV**: [*mnettv*](## "netrc machine")
- **MNetTVLive**: [*mnettv*](## "netrc machine")
- **MNetTVRecordings**: [*mnettv*](## "netrc machine")
- **MochaVideo**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **Mofosex**
- **MofosexEmbed**
- **Mojvideo**
- **Monstercat**
- **MonsterSirenHypergryphMusic**
@ -839,13 +800,12 @@ # Supported sites
- **Motorsport**: motorsport.com
- **MotorTrend**
- **MotorTrendOnDemand**
- **MovieClips**
- **MovieFap**
- **Moviepilot**
- **MoviewPlay**
- **Moviezine**
- **MovingImage**
- **MSN**
- **MSN**: (**Currently broken**)
- **mtg**: MTG services
- **mtv**
- **mtv.de**
@ -867,18 +827,13 @@ # Supported sites
- **MusicdexSong**
- **mva**: Microsoft Virtual Academy videos
- **mva:course**: Microsoft Virtual Academy courses
- **Mwave**
- **MwaveMeetGreet**
- **Mxplayer**
- **MxplayerShow**
- **MyChannels**
- **MySpace**
- **MySpace:album**
- **MySpass**
- **Myvi**
- **MyVideoGe**
- **MyVidster**
- **MyviEmbed**
- **Mzaalo**
- **n-tv.de**
- **N1Info:article**
@ -890,12 +845,12 @@ # Supported sites
- **Naver**
- **Naver:live**
- **navernow**
- **NBA**
- **nba**
- **nba:channel**
- **nba:embed**
- **nba:watch**
- **nba:watch:collection**
- **NBAChannel**
- **NBAEmbed**
- **NBAWatchEmbed**
- **nba:watch:embed**
- **NBC**
- **NBCNews**
- **nbcolympics**
@ -910,6 +865,7 @@ # Supported sites
- **NDTV**
- **Nebula**: [*watchnebula*](## "netrc machine")
- **nebula:channel**: [*watchnebula*](## "netrc machine")
- **nebula:class**: [*watchnebula*](## "netrc machine")
- **nebula:subscriptions**: [*watchnebula*](## "netrc machine")
- **NekoHacker**
- **NerdCubedFeed**
@ -931,7 +887,6 @@ # Supported sites
- **Newgrounds:playlist**
- **Newgrounds:user**
- **NewsPicks**
- **Newstube**
- **Newsy**
- **NextMedia**: 蘋果日報
- **NextMediaActionNews**: 蘋果日報 - 動新聞
@ -957,7 +912,6 @@ # Supported sites
- **nick.de**
- **nickelodeon:br**
- **nickelodeonru**
- **nicknight**
- **niconico**: [*niconico*](## "netrc machine") ニコニコ動画
- **niconico:history**: NicoNico user history or likes. Requires cookies.
- **niconico:live**: ニコニコ生放送
@ -975,15 +929,12 @@ # Supported sites
- **Nitter**
- **njoy**: N-JOY
- **njoy:embed**
- **NJPWWorld**: [*njpwworld*](## "netrc machine") 新日本プロレスワールド
- **NobelPrize**
- **NoicePodcast**
- **NonkTube**
- **NoodleMagazine**
- **Noovo**
- **Normalboots**
- **NOSNLArticle**
- **NosVideo**
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- **NovaEmbed**
- **NovaPlay**
@ -1006,7 +957,7 @@ # Supported sites
- **NRKTVEpisodes**
- **NRKTVSeason**
- **NRKTVSeries**
- **NRLTV**
- **NRLTV**: (**Currently broken**)
- **ntv.ru**
- **NubilesPorn**: [*nubiles-porn*](## "netrc machine")
- **Nuvid**
@ -1026,6 +977,7 @@ # Supported sites
- **on24**: ON24
- **OnDemandChinaEpisode**
- **OnDemandKorea**
- **OnDemandKoreaProgram**
- **OneFootball**
- **OnePlacePodcast**
- **onet.pl**
@ -1033,8 +985,6 @@ # Supported sites
- **onet.tv:channel**
- **OnetMVP**
- **OnionStudios**
- **Ooyala**
- **OoyalaExternal**
- **Opencast**
- **OpencastPlaylist**
- **openrec**
@ -1043,6 +993,7 @@ # Supported sites
- **OraTV**
- **orf:fm4:story**: fm4.orf.at stories
- **orf:iptv**: iptv.ORF.at
- **orf:podcast**
- **orf:radio**
- **orf:tvthek**: ORF TVthek
- **OsnatelTV**: [*osnateltv*](## "netrc machine")
@ -1055,7 +1006,6 @@ # Supported sites
- **PalcoMP3:artist**
- **PalcoMP3:song**
- **PalcoMP3:video**
- **pandora.tv**: 판도라TV
- **Panopto**
- **PanoptoList**
- **PanoptoPlaylist**
@ -1077,7 +1027,6 @@ # Supported sites
- **PeerTube:Playlist**
- **peloton**: [*peloton*](## "netrc machine")
- **peloton:live**: Peloton Live
- **People**
- **PerformGroup**
- **periscope**: Periscope
- **periscope:user**: Periscope user videos
@ -1099,14 +1048,11 @@ # Supported sites
- **PlanetMarathi**
- **Platzi**: [*platzi*](## "netrc machine")
- **PlatziCourse**: [*platzi*](## "netrc machine")
- **play.fm**
- **player.sky.it**
- **PlayPlusTV**: [*playplustv*](## "netrc machine")
- **PlayStuff**
- **PlaysTV**
- **PlaySuisse**
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid**
- **PlayVids**
- **Playwire**
- **pluralsight**: [*pluralsight*](## "netrc machine")
@ -1131,11 +1077,8 @@ # Supported sites
- **Popcorntimes**
- **PopcornTV**
- **Pornbox**
- **PornCom**
- **PornerBros**
- **Pornez**
- **PornFlip**
- **PornHd**
- **PornHub**: [*pornhub*](## "netrc machine") PornHub and Thumbzilla
- **PornHubPagedVideoList**: [*pornhub*](## "netrc machine")
- **PornHubPlaylist**: [*pornhub*](## "netrc machine")
@ -1177,9 +1120,10 @@ # Supported sites
- **Radiko**
- **RadikoRadio**
- **radio.de**
- **radiobremen**
- **radiocanada**
- **radiocanada:audiovideo**
- **RadioComercial**
- **RadioComercialPlaylist**
- **radiofrance**
- **RadioFranceLive**
- **RadioFrancePodcast**
@ -1215,7 +1159,6 @@ # Supported sites
- **RCTIPlusSeries**
- **RCTIPlusTV**
- **RDS**: RDS.ca
- **Recurbate**
- **RedBull**
- **RedBullEmbed**
- **RedBullTV**
@ -1232,7 +1175,7 @@ # Supported sites
- **Reuters**
- **ReverbNation**
- **RheinMainTV**
- **RICE**
- **RinseFM**
- **RMCDecouverte**
- **RockstarGames**
- **Rokfin**: [*rokfin*](## "netrc machine")
@ -1253,8 +1196,6 @@ # Supported sites
- **rtl.lu:tele-vod**
- **rtl.nl**: rtl.nl and rtlxl.nl
- **rtl2**
- **rtl2:you**
- **rtl2:you:series**
- **RTLLuLive**
- **RTLLuRadio**
- **RTNews**
@ -1269,10 +1210,9 @@ # Supported sites
- **rtve.es:infantil**: RTVE infantil
- **rtve.es:live**: RTVE.es live streams
- **rtve.es:television**
- **RTVNH**
- **RTVS**
- **rtvslo.si**
- **RUHD**
- **RudoVideo**
- **Rule34Video**
- **Rumble**
- **RumbleChannel**
@ -1306,6 +1246,9 @@ # Supported sites
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
- **sbs.co.kr**
- **sbs.co.kr:allvod_program**
- **sbs.co.kr:programs_vod**
- **schooltv**
- **ScienceChannel**
- **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix
@ -1316,8 +1259,8 @@ # Supported sites
- **ScrippsNetworks**
- **scrippsnetworks:watch**
- **Scrolller**
- **SCTE**: [*scte*](## "netrc machine")
- **SCTECourse**: [*scte*](## "netrc machine")
- **SCTE**: [*scte*](## "netrc machine") (**Currently broken**)
- **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**)
- **Seeker**
- **SenalColombiaLive**
- **SenateGov**
@ -1329,7 +1272,6 @@ # Supported sites
- **SeznamZpravyArticle**
- **Shahid**: [*shahid*](## "netrc machine")
- **ShahidShow**
- **Shared**: shared.sx
- **ShareVideosEmbed**
- **ShemarooMe**
- **ShowRoomLive**
@ -1381,7 +1323,6 @@ # Supported sites
- **SovietsClosetPlaylist**
- **SpankBang**
- **SpankBangPlaylist**
- **Spankwire**
- **Spiegel**
- **Sport5**
- **SportBox**
@ -1394,7 +1335,7 @@ # Supported sites
- **SpreakerShowPage**
- **SpringboardPlatform**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk
- **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
- **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- **StacommuLive**: [*stacommu*](## "netrc machine")
@ -1411,7 +1352,6 @@ # Supported sites
- **StoryFireSeries**
- **StoryFireUser**
- **Streamable**
- **streamcloud.eu**
- **StreamCZ**
- **StreamFF**
- **StreetVoice**
@ -1427,7 +1367,6 @@ # Supported sites
- **SVTPlay**: SVT Play and Öppet arkiv
- **SVTSeries**
- **SwearnetEpisode**
- **SWRMediathek**
- **Syfy**
- **SYVDK**
- **SztvHu**
@ -1446,7 +1385,6 @@ # Supported sites
- **TeachingChannel**
- **Teamcoco**
- **TeamTreeHouse**: [*teamtreehouse*](## "netrc machine")
- **TechTalks**
- **techtv.mit.edu**
- **TedEmbed**
- **TedPlaylist**
@ -1474,6 +1412,10 @@ # Supported sites
- **TenPlaySeason**
- **TF1**
- **TFO**
- **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine")
- **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine")
- **TheGuardianPodcast**
- **TheGuardianPodcastPlaylist**
- **TheHoleTv**
- **TheIntercept**
- **ThePlatform**
@ -1482,8 +1424,7 @@ # Supported sites
- **TheSun**
- **TheWeatherChannel**
- **ThisAmericanLife**
- **ThisAV**
- **ThisOldHouse**
- **ThisOldHouse**: [*thisoldhouse*](## "netrc machine")
- **ThisVid**
- **ThisVidMember**
- **ThisVidPlaylist**
@ -1495,27 +1436,23 @@ # Supported sites
- **tiktok:sound**: (**Currently broken**)
- **tiktok:tag**: (**Currently broken**)
- **tiktok:user**: (**Currently broken**)
- **tinypic**: tinypic.com videos
- **TLC**
- **TMZ**
- **TNAFlix**
- **TNAFlixNetworkEmbed**
- **toggle**
- **toggo**
- **Tokentube**
- **Tokentube:channel**
- **tokfm:audition**
- **tokfm:podcast**
- **ToonGoggles**
- **tou.tv**: [*toutv*](## "netrc machine")
- **Toypics**: Toypics video
- **ToypicsUser**: Toypics user profile
- **Toypics**: Toypics video (**Currently broken**)
- **ToypicsUser**: Toypics user profile (**Currently broken**)
- **TrailerAddict**: (**Currently broken**)
- **TravelChannel**
- **Triller**: [*triller*](## "netrc machine")
- **TrillerShort**
- **TrillerUser**: [*triller*](## "netrc machine")
- **Trilulilu**
- **Trovo**
- **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
- **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix
@ -1525,7 +1462,7 @@ # Supported sites
- **TruNews**
- **Truth**
- **TruTV**
- **Tube8**
- **Tube8**: (**Currently broken**)
- **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at
- **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine")
- **TubiTv**: [*tubitv*](## "netrc machine")
@ -1534,7 +1471,6 @@ # Supported sites
- **TuneInPodcast**
- **TuneInPodcastEpisode**
- **TuneInStation**
- **TunePk**
- **Turbo**
- **tv.dfb.de**
- **TV2**
@ -1558,14 +1494,7 @@ # Supported sites
- **TVIPlayer**
- **tvland.com**
- **TVN24**
- **TVNet**
- **TVNoe**
- **TVNow**
- **TVNowAnnual**
- **TVNowFilm**
- **TVNowNew**
- **TVNowSeason**
- **TVNowShow**
- **tvopengr:embed**: tvopen.gr embedded videos
- **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos
- **tvp**: Telewizja Polska
@ -1603,7 +1532,6 @@ # Supported sites
- **umg:de**: Universal Music Deutschland
- **Unistra**
- **Unity**
- **UnscriptedNewsVideo**
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
@ -1618,7 +1546,6 @@ # Supported sites
- **Utreon**
- **Varzesh3**
- **Vbox7**
- **VeeHD**
- **Veo**
- **Veoh**
- **veoh:user**
@ -1631,7 +1558,6 @@ # Supported sites
- **vice**
- **vice:article**
- **vice:show**
- **Vidbit**
- **Viddler**
- **Videa**
- **video.arnes.si**: Arnes Video
@ -1653,6 +1579,7 @@ # Supported sites
- **VidioLive**: [*vidio*](## "netrc machine")
- **VidioPremier**: [*vidio*](## "netrc machine")
- **VidLii**
- **Vidly**
- **viewlift**
- **viewlift:embed**
- **Viidea**
@ -1672,7 +1599,6 @@ # Supported sites
- **Vimm:stream**
- **ViMP**
- **ViMP:Playlist**
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
- **Viqeo**
@ -1680,7 +1606,6 @@ # Supported sites
- **viu:ott**: [*viu*](## "netrc machine")
- **viu:playlist**
- **ViuOTTIndonesia**
- **Vivo**: vivo.sx
- **vk**: [*vk*](## "netrc machine") VK
- **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos
- **vk:wallpost**: [*vk*](## "netrc machine")
@ -1688,37 +1613,27 @@ # Supported sites
- **VKPlayLive**
- **vm.tiktok**
- **Vocaroo**
- **Vodlocker**
- **VODPl**
- **VODPlatform**
- **VoiceRepublic**
- **voicy**
- **voicy:channel**
- **VolejTV**
- **Voot**: [*voot*](## "netrc machine")
- **VootSeries**: [*voot*](## "netrc machine")
- **Voot**: [*voot*](## "netrc machine") (**Currently broken**)
- **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**)
- **VoxMedia**
- **VoxMediaVolume**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **vqq:series**
- **vqq:video**
- **Vrak**
- **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
- **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX
- **vrv**: [*vrv*](## "netrc machine")
- **vrv:series**
- **VShare**
- **VTM**
- **VTXTV**: [*vtxtv*](## "netrc machine")
- **VTXTVLive**: [*vtxtv*](## "netrc machine")
- **VTXTVRecordings**: [*vtxtv*](## "netrc machine")
- **VuClip**
- **Vupload**
- **VVVVID**
- **VVVVIDShow**
- **VyboryMos**
- **Vzaar**
- **Wakanim**
- **Walla**
- **WalyTV**: [*walytv*](## "netrc machine")
- **WalyTVLive**: [*walytv*](## "netrc machine")
@ -1729,9 +1644,7 @@ # Supported sites
- **washingtonpost**
- **washingtonpost:article**
- **wat.tv**
- **WatchBox**
- **WatchESPN**
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- **wdr:mobile**: (**Currently broken**)
- **WDRElefant**
@ -1759,7 +1672,6 @@ # Supported sites
- **whowatch**
- **Whyp**
- **wikimedia.org**
- **Willow**
- **Wimbledon**
- **WimTV**
- **WinSportsVideo**
@ -1784,7 +1696,6 @@ # Supported sites
- **wykop:post**
- **wykop:post:comment**
- **Xanimu**
- **XBef**
- **XboxClips**
- **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing
- **XHamster**
@ -1796,9 +1707,6 @@ # Supported sites
- **XMinus**
- **XNXX**
- **Xstream**
- **XTube**
- **XTubeUser**: XTube user profile
- **Xuite**: 隨意窩Xuite影音
- **XVideos**
- **xvideos:quickies**
- **XXXYMovies**
@ -1815,10 +1723,7 @@ # Supported sites
- **YapFiles**
- **Yappy**
- **YappyProfile**
- **YesJapan**
- **yinyuetai:video**: 音悦Tai
- **YleAreena**
- **Ynet**
- **YouJizz**
- **youku**: 优酷
- **youku:show**
@ -1866,6 +1771,9 @@ # Supported sites
- **zingmp3:chart-home**
- **zingmp3:chart-music-video**
- **zingmp3:hub**
- **zingmp3:liveradio**
- **zingmp3:podcast**
- **zingmp3:podcast-episode**
- **zingmp3:user**
- **zingmp3:week-chart**
- **zoom**

View file

@ -19,3 +19,8 @@ def handler(request):
pytest.skip(f'{RH_KEY} request handler is not available')
return functools.partial(handler, logger=FakeLogger)
def validate_and_send(rh, req):
rh.validate(req)
return rh.send(req)

View file

@ -10,7 +10,7 @@
import yt_dlp.extractor
from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name
from yt_dlp.utils import preferredencoding, write_string
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
if 'pytest' in sys.modules:
import pytest
@ -214,14 +214,19 @@ def sanitize(key, value):
test_info_dict = {
key: sanitize(key, value) for key, value in got_dict.items()
if value is not None and key not in IGNORED_FIELDS and not any(
key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES)
if value is not None and key not in IGNORED_FIELDS and (
not any(key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES)
or key == '_old_archive_ids')
}
# display_id may be generated from id
if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id')
# release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year')
# Check url for flat entries
if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'):
test_info_dict['url'] = got_dict['url']
@ -324,3 +329,8 @@ def http_server_port(httpd):
else:
sock = httpd.socket
return sock.getsockname()[1]
def verify_address_availability(address):
if find_available_port(address) is None:
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')

View file

@ -140,6 +140,8 @@ def test(inp, *expected, multi=False):
test('example-with-dashes', 'example-with-dashes')
test('all', '2', '47', '45', 'example-with-dashes', '35')
test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
test('7_a/worst', '35')
def test_format_selection_audio(self):
formats = [
@ -728,7 +730,7 @@ def expect_same_infodict(out):
self.assertEqual(got_dict.get(info_field), expected, info_field)
return True
test('%()j', (expect_same_infodict, str))
test('%()j', (expect_same_infodict, None))
# NA placeholder
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
@ -797,6 +799,7 @@ def expect_same_infodict(out):
test('%(title|%)s %(title|%%)s', '% %%')
test('%(id+1-height+3)05d', '00158')
test('%(width+100)05d', 'NA')
test('%(filesize*8)d', '8192')
test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None))
test('%(formats.0)r', (repr(FORMATS[0]), None))
test('%(height.0)03d', '001')

View file

@ -26,7 +26,7 @@
from email.message import Message
from http.cookiejar import CookieJar
from test.helper import FakeYDL, http_server_port
from test.helper import FakeYDL, http_server_port, verify_address_availability
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import brotli, requests, urllib3
from yt_dlp.networking import (
@ -52,6 +52,8 @@
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
from yt_dlp.utils.networking import HTTPHeaderDict
from test.conftest import validate_and_send
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@ -178,6 +180,12 @@ def do_GET(self):
self.send_header('Location', '/a/b/./../../headers')
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path == '/redirect_dotsegments_absolute':
self.send_response(301)
# redirect to /headers but with dot segments before - absolute url
self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers')
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path.startswith('/redirect_'):
self._redirect()
elif self.path.startswith('/method'):
@ -275,11 +283,6 @@ def send_header(self, keyword, value):
self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
def validate_and_send(rh, req):
rh.validate(req)
return rh.send(req)
class TestRequestHandlerBase:
@classmethod
def setup_class(cls):
@ -331,7 +334,7 @@ def test_ssl_error(self, handler):
https_server_thread.start()
with handler(verify=False) as rh:
with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info:
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
assert not issubclass(exc_info.type, CertificateVerifyError)
@ -348,16 +351,17 @@ def test_percent_encode(self, handler):
res.close()
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_remove_dot_segments(self, handler):
with handler() as rh:
@pytest.mark.parametrize('path', [
'/a/b/./../../headers',
'/redirect_dotsegments',
# https://github.com/yt-dlp/yt-dlp/issues/9020
'/redirect_dotsegments_absolute',
])
def test_remove_dot_segments(self, handler, path):
with handler(verbose=True) as rh:
# This isn't a comprehensive test,
# but it should be enough to check whether the handler is removing dot segments
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/a/b/./../../headers'))
assert res.status == 200
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
res.close()
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_dotsegments'))
# but it should be enough to check whether the handler is removing dot segments in required scenarios
res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}'))
assert res.status == 200
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
res.close()
@ -541,6 +545,9 @@ def test_timeout(self, handler):
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
def test_source_address(self, handler):
source_address = f'127.0.0.{random.randint(5, 255)}'
# on some systems these loopback addresses we need for testing may not be available
# see: https://github.com/yt-dlp/yt-dlp/issues/8890
verify_address_availability(source_address)
with handler(source_address=source_address) as rh:
data = validate_and_send(
rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode()
@ -872,8 +879,9 @@ def request(self, *args, **kwargs):
])
@pytest.mark.parametrize('handler', ['Requests'], indirect=True)
def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match):
from urllib3.response import HTTPResponse as Urllib3Response
from requests.models import Response as RequestsResponse
from urllib3.response import HTTPResponse as Urllib3Response
from yt_dlp.networking._requests import RequestsResponseAdapter
requests_res = RequestsResponse()
requests_res.raw = Urllib3Response(body=b'', status=200)
@ -929,13 +937,17 @@ class HTTPSupportedRH(ValidationRH):
('http', False, {}),
('https', False, {}),
]),
('Websockets', [
('ws', False, {}),
('wss', False, {}),
]),
(NoCheckRH, [('http', False, {})]),
(ValidationRH, [('http', UnsupportedRequest, {})])
]
PROXY_SCHEME_TESTS = [
# scheme, expected to fail
('Urllib', [
('Urllib', 'http', [
('http', False),
('https', UnsupportedRequest),
('socks4', False),
@ -944,7 +956,7 @@ class HTTPSupportedRH(ValidationRH):
('socks5h', False),
('socks', UnsupportedRequest),
]),
('Requests', [
('Requests', 'http', [
('http', False),
('https', False),
('socks4', False),
@ -952,8 +964,11 @@ class HTTPSupportedRH(ValidationRH):
('socks5', False),
('socks5h', False),
]),
(NoCheckRH, [('http', False)]),
(HTTPSupportedRH, [('http', UnsupportedRequest)]),
(NoCheckRH, 'http', [('http', False)]),
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
('Websockets', 'ws', [('http', UnsupportedRequest)]),
(NoCheckRH, 'http', [('http', False)]),
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
]
PROXY_KEY_TESTS = [
@ -972,7 +987,7 @@ class HTTPSupportedRH(ValidationRH):
]
EXTENSION_TESTS = [
('Urllib', [
('Urllib', 'http', [
({'cookiejar': 'notacookiejar'}, AssertionError),
({'cookiejar': YoutubeDLCookieJar()}, False),
({'cookiejar': CookieJar()}, AssertionError),
@ -980,17 +995,21 @@ class HTTPSupportedRH(ValidationRH):
({'timeout': 'notatimeout'}, AssertionError),
({'unsupported': 'value'}, UnsupportedRequest),
]),
('Requests', [
('Requests', 'http', [
({'cookiejar': 'notacookiejar'}, AssertionError),
({'cookiejar': YoutubeDLCookieJar()}, False),
({'timeout': 1}, False),
({'timeout': 'notatimeout'}, AssertionError),
({'unsupported': 'value'}, UnsupportedRequest),
]),
(NoCheckRH, [
(NoCheckRH, 'http', [
({'cookiejar': 'notacookiejar'}, False),
({'somerandom': 'test'}, False), # but any extension is allowed through
]),
('Websockets', 'ws', [
({'cookiejar': YoutubeDLCookieJar()}, False),
({'timeout': 2}, False),
]),
]
@pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
@ -1016,14 +1035,14 @@ def test_proxy_key(self, handler, proxy_key, fail):
run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
@pytest.mark.parametrize('handler,scheme,fail', [
(handler_tests[0], scheme, fail)
@pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
(handler_tests[0], handler_tests[1], scheme, fail)
for handler_tests in PROXY_SCHEME_TESTS
for scheme, fail in handler_tests[1]
for scheme, fail in handler_tests[2]
], indirect=['handler'])
def test_proxy_scheme(self, handler, scheme, fail):
run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'}))
run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'})
def test_proxy_scheme(self, handler, req_scheme, scheme, fail):
run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True)
def test_empty_proxy(self, handler):
@ -1035,14 +1054,14 @@ def test_empty_proxy(self, handler):
def test_invalid_proxy_url(self, handler, proxy_url):
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
@pytest.mark.parametrize('handler,extensions,fail', [
(handler_tests[0], extensions, fail)
@pytest.mark.parametrize('handler,scheme,extensions,fail', [
(handler_tests[0], handler_tests[1], extensions, fail)
for handler_tests in EXTENSION_TESTS
for extensions, fail in handler_tests[1]
for extensions, fail in handler_tests[2]
], indirect=['handler'])
def test_extension(self, handler, extensions, fail):
def test_extension(self, handler, scheme, extensions, fail):
run_validation(
handler, fail, Request('http://', extensions=extensions))
handler, fail, Request(f'{scheme}://', extensions=extensions))
def test_invalid_request_type(self):
rh = self.ValidationRH(logger=FakeLogger())
@ -1075,6 +1094,22 @@ def __init__(self, *args, **kwargs):
self._request_director = self.build_request_director([FakeRH])
class AllUnsupportedRHYDL(FakeYDL):
def __init__(self, *args, **kwargs):
class UnsupportedRH(RequestHandler):
def _send(self, request: Request):
pass
_SUPPORTED_FEATURES = ()
_SUPPORTED_PROXY_SCHEMES = ()
_SUPPORTED_URL_SCHEMES = ()
super().__init__(*args, **kwargs)
self._request_director = self.build_request_director([UnsupportedRH])
class TestRequestDirector:
def test_handler_operations(self):
@ -1234,6 +1269,12 @@ def test_file_urls_error(self):
with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'):
ydl.urlopen('file://')
@pytest.mark.parametrize('scheme', (['ws', 'wss']))
def test_websocket_unavailable_error(self, scheme):
with AllUnsupportedRHYDL() as ydl:
with pytest.raises(RequestError, match=r'This request requires WebSocket support'):
ydl.urlopen(f'{scheme}://')
def test_legacy_server_connect_error(self):
with FakeRHYDL() as ydl:
for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
@ -1293,6 +1334,10 @@ def test_clean_header(self):
assert 'Youtubedl-no-compression' not in rh.headers
assert rh.headers.get('Accept-Encoding') == 'identity'
with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl:
rh = self.build_handler(ydl)
assert 'Ytdl-socks-proxy' not in rh.headers
def test_build_handler_params(self):
with FakeYDL({
'http_headers': {'test': 'testtest'},

View file

@ -8,13 +8,9 @@
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import contextlib
import io
import platform
import random
import ssl
import urllib.error
import warnings
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import certifi
@ -30,7 +26,6 @@
from yt_dlp.networking.exceptions import (
HTTPError,
IncompleteRead,
_CompatHTTPError,
)
from yt_dlp.socks import ProxyType
from yt_dlp.utils.networking import HTTPHeaderDict
@ -179,11 +174,10 @@ class TestNetworkingExceptions:
def create_response(status):
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
def test_http_error(self, http_error_class):
def test_http_error(self):
response = self.create_response(403)
error = http_error_class(response)
error = HTTPError(response)
assert error.status == 403
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
@ -194,80 +188,12 @@ def test_http_error(self, http_error_class):
assert data == b'test'
assert repr(error) == '<HTTPError 403: Forbidden>'
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
def test_redirect_http_error(self, http_error_class):
def test_redirect_http_error(self):
response = self.create_response(301)
error = http_error_class(response, redirect_loop=True)
error = HTTPError(response, redirect_loop=True)
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
assert error.reason == 'Moved Permanently'
def test_compat_http_error(self):
response = self.create_response(403)
error = _CompatHTTPError(HTTPError(response))
assert isinstance(error, HTTPError)
assert isinstance(error, urllib.error.HTTPError)
@contextlib.contextmanager
def raises_deprecation_warning():
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
yield
if len(w) == 0:
pytest.fail('Did not raise DeprecationWarning')
if len(w) > 1:
pytest.fail(f'Raised multiple warnings: {w}')
if not issubclass(w[-1].category, DeprecationWarning):
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
w.clear()
with raises_deprecation_warning():
assert error.code == 403
with raises_deprecation_warning():
assert error.getcode() == 403
with raises_deprecation_warning():
assert error.hdrs is error.response.headers
with raises_deprecation_warning():
assert error.info() is error.response.headers
with raises_deprecation_warning():
assert error.headers is error.response.headers
with raises_deprecation_warning():
assert error.filename == error.response.url
with raises_deprecation_warning():
assert error.url == error.response.url
with raises_deprecation_warning():
assert error.geturl() == error.response.url
# Passthrough file operations
with raises_deprecation_warning():
assert error.read() == b'test'
with raises_deprecation_warning():
assert not error.closed
with raises_deprecation_warning():
# Technically Response operations are also passed through, which should not be used.
assert error.get_header('test') == 'test'
# Should not raise a warning
error.close()
@pytest.mark.skipif(
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
def test_compat_http_error_autoclose(self):
# Compat HTTPError should not autoclose response
response = self.create_response(403)
_CompatHTTPError(HTTPError(response))
assert not response.closed
def test_incomplete_read_error(self):
error = IncompleteRead(4, 3, cause='test')
assert isinstance(error, IncompleteRead)

View file

@ -25,7 +25,7 @@
ThreadingTCPServer,
)
from test.helper import http_server_port
from test.helper import http_server_port, verify_address_availability
from yt_dlp.networking import Request
from yt_dlp.networking.exceptions import ProxyError, TransportError
from yt_dlp.socks import (
@ -210,6 +210,16 @@ def do_GET(self):
self.wfile.write(payload.encode())
class SocksWebSocketTestRequestHandler(SocksTestRequestHandler):
def handle(self):
import websockets.sync.server
protocol = websockets.ServerProtocol()
connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0)
connection.handshake()
connection.send(json.dumps(self.socks_info))
connection.close()
@contextlib.contextmanager
def socks_server(socks_server_class, request_handler, bind_ip=None, **socks_server_kwargs):
server = server_thread = None
@ -252,8 +262,22 @@ def socks_info_request(self, handler, target_domain=None, target_port=None, **re
return json.loads(handler.send(request).read().decode())
class WebSocketSocksTestProxyContext(SocksProxyTestContext):
REQUEST_HANDLER_CLASS = SocksWebSocketTestRequestHandler
def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
request = Request(f'ws://{target_domain or "127.0.0.1"}:{target_port or "40000"}', **req_kwargs)
handler.validate(request)
ws = handler.send(request)
ws.send('socks_info')
socks_info = ws.recv()
ws.close()
return json.loads(socks_info)
CTX_MAP = {
'http': HTTPSocksTestProxyContext,
'ws': WebSocketSocksTestProxyContext,
}
@ -263,7 +287,7 @@ def ctx(request):
class TestSocks4Proxy:
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks4_no_auth(self, handler, ctx):
with handler() as rh:
with ctx.socks_server(Socks4ProxyHandler) as server_address:
@ -271,7 +295,7 @@ def test_socks4_no_auth(self, handler, ctx):
rh, proxies={'all': f'socks4://{server_address}'})
assert response['version'] == 4
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks4_auth(self, handler, ctx):
with handler() as rh:
with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address:
@ -281,7 +305,7 @@ def test_socks4_auth(self, handler, ctx):
rh, proxies={'all': f'socks4://user:@{server_address}'})
assert response['version'] == 4
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks4a_ipv4_target(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
@ -289,7 +313,7 @@ def test_socks4a_ipv4_target(self, handler, ctx):
assert response['version'] == 4
assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1')
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks4a_domain_target(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
with handler(proxies={'all': f'socks4a://{server_address}'}) as rh:
@ -298,17 +322,18 @@ def test_socks4a_domain_target(self, handler, ctx):
assert response['ipv4_address'] is None
assert response['domain_address'] == 'localhost'
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_ipv4_client_source_address(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler) as server_address:
source_address = f'127.0.0.{random.randint(5, 255)}'
verify_address_availability(source_address)
with handler(proxies={'all': f'socks4://{server_address}'},
source_address=source_address) as rh:
response = ctx.socks_info_request(rh)
assert response['client_address'][0] == source_address
assert response['version'] == 4
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
@pytest.mark.parametrize('reply_code', [
Socks4CD.REQUEST_REJECTED_OR_FAILED,
Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD,
@ -320,7 +345,7 @@ def test_socks4_errors(self, handler, ctx, reply_code):
with pytest.raises(ProxyError):
ctx.socks_info_request(rh)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_ipv6_socks4_proxy(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address:
with handler(proxies={'all': f'socks4://{server_address}'}) as rh:
@ -329,7 +354,7 @@ def test_ipv6_socks4_proxy(self, handler, ctx):
assert response['ipv4_address'] == '127.0.0.1'
assert response['version'] == 4
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_timeout(self, handler, ctx):
with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address:
with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh:
@ -339,7 +364,7 @@ def test_timeout(self, handler, ctx):
class TestSocks5Proxy:
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_no_auth(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@ -347,7 +372,7 @@ def test_socks5_no_auth(self, handler, ctx):
assert response['auth_methods'] == [0x0]
assert response['version'] == 5
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_user_pass(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address:
with handler() as rh:
@ -360,7 +385,7 @@ def test_socks5_user_pass(self, handler, ctx):
assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS]
assert response['version'] == 5
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_ipv4_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@ -368,7 +393,7 @@ def test_socks5_ipv4_target(self, handler, ctx):
assert response['ipv4_address'] == '127.0.0.1'
assert response['version'] == 5
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_domain_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@ -376,7 +401,7 @@ def test_socks5_domain_target(self, handler, ctx):
assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1')
assert response['version'] == 5
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5h_domain_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
@ -385,7 +410,7 @@ def test_socks5h_domain_target(self, handler, ctx):
assert response['domain_address'] == 'localhost'
assert response['version'] == 5
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5h_ip_target(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5h://{server_address}'}) as rh:
@ -394,7 +419,7 @@ def test_socks5h_ip_target(self, handler, ctx):
assert response['domain_address'] is None
assert response['version'] == 5
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_socks5_ipv6_destination(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@ -402,7 +427,7 @@ def test_socks5_ipv6_destination(self, handler, ctx):
assert response['ipv6_address'] == '::1'
assert response['version'] == 5
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_ipv6_socks5_proxy(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}) as rh:
@ -413,16 +438,17 @@ def test_ipv6_socks5_proxy(self, handler, ctx):
# XXX: is there any feasible way of testing IPv6 source addresses?
# Same would go for non-proxy source_address test...
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
def test_ipv4_client_source_address(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler) as server_address:
source_address = f'127.0.0.{random.randint(5, 255)}'
verify_address_availability(source_address)
with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh:
response = ctx.socks_info_request(rh)
assert response['client_address'][0] == source_address
assert response['version'] == 5
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws')], indirect=True)
@pytest.mark.parametrize('reply_code', [
Socks5Reply.GENERAL_FAILURE,
Socks5Reply.CONNECTION_NOT_ALLOWED,
@ -439,7 +465,7 @@ def test_socks5_errors(self, handler, ctx, reply_code):
with pytest.raises(ProxyError):
ctx.socks_info_request(rh)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http')], indirect=True)
@pytest.mark.parametrize('handler,ctx', [('Urllib', 'http'), ('Websockets', 'ws')], indirect=True)
def test_timeout(self, handler, ctx):
with ctx.socks_server(Socks5ProxyHandler, sleep=2) as server_address:
with handler(proxies={'all': f'socks5://{server_address}'}, timeout=1) as rh:

228
test/test_update.py Normal file
View file

@ -0,0 +1,228 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, report_warning
from yt_dlp.update import UpdateInfo, Updater
# XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES
TEST_UPDATE_SOURCES = {
'stable': 'yt-dlp/yt-dlp',
'nightly': 'yt-dlp/yt-dlp-nightly-builds',
'master': 'yt-dlp/yt-dlp-master-builds',
}
TEST_API_DATA = {
'yt-dlp/yt-dlp/latest': {
'tag_name': '2023.12.31',
'target_commitish': 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
'name': 'yt-dlp 2023.12.31',
'body': 'BODY',
},
'yt-dlp/yt-dlp-nightly-builds/latest': {
'tag_name': '2023.12.31.123456',
'target_commitish': 'master',
'name': 'yt-dlp nightly 2023.12.31.123456',
'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/cccccccccccccccccccccccccccccccccccccccc',
},
'yt-dlp/yt-dlp-master-builds/latest': {
'tag_name': '2023.12.31.987654',
'target_commitish': 'master',
'name': 'yt-dlp master 2023.12.31.987654',
'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/dddddddddddddddddddddddddddddddddddddddd',
},
'yt-dlp/yt-dlp/tags/testing': {
'tag_name': 'testing',
'target_commitish': '9999999999999999999999999999999999999999',
'name': 'testing',
'body': 'BODY',
},
'fork/yt-dlp/latest': {
'tag_name': '2050.12.31',
'target_commitish': 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
'name': '2050.12.31',
'body': 'BODY',
},
'fork/yt-dlp/tags/pr0000': {
'tag_name': 'pr0000',
'target_commitish': 'ffffffffffffffffffffffffffffffffffffffff',
'name': 'pr1234 2023.11.11.000000',
'body': 'BODY',
},
'fork/yt-dlp/tags/pr1234': {
'tag_name': 'pr1234',
'target_commitish': '0000000000000000000000000000000000000000',
'name': 'pr1234 2023.12.31.555555',
'body': 'BODY',
},
'fork/yt-dlp/tags/pr9999': {
'tag_name': 'pr9999',
'target_commitish': '1111111111111111111111111111111111111111',
'name': 'pr9999',
'body': 'BODY',
},
'fork/yt-dlp-satellite/tags/pr987': {
'tag_name': 'pr987',
'target_commitish': 'master',
'name': 'pr987',
'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/2222222222222222222222222222222222222222',
},
}
TEST_LOCKFILE_COMMENT = '# This file is used for regulating self-update'
TEST_LOCKFILE_V1 = r'''%s
lock 2022.08.18.36 .+ Python 3\.6
lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7
lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
''' % TEST_LOCKFILE_COMMENT
TEST_LOCKFILE_V2_TMPL = r'''%s
lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6
lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server)
'''
TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT
TEST_LOCKFILE_ACTUAL = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_V1.rstrip('\n')
TEST_LOCKFILE_FORK = r'''%s# Test if a fork blocks updates to non-numeric tags
lockV2 fork/yt-dlp pr0000 .+ Python 3.6
lockV2 fork/yt-dlp pr1234 (?!win_x86_exe).+ Python 3\.7
lockV2 fork/yt-dlp pr1234 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 fork/yt-dlp pr9999 .+ Python 3.11
''' % TEST_LOCKFILE_ACTUAL
class FakeUpdater(Updater):
current_version = '2022.01.01'
current_commit = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
_channel = 'stable'
_origin = 'yt-dlp/yt-dlp'
_update_sources = TEST_UPDATE_SOURCES
def _download_update_spec(self, *args, **kwargs):
return TEST_LOCKFILE_ACTUAL
def _call_api(self, tag):
tag = f'tags/{tag}' if tag != 'latest' else tag
return TEST_API_DATA[f'{self.requested_repo}/{tag}']
def _report_error(self, msg, *args, **kwargs):
report_warning(msg)
class TestUpdate(unittest.TestCase):
maxDiff = None
def test_update_spec(self):
ydl = FakeYDL()
updater = FakeUpdater(ydl, 'stable')
def test(lockfile, identifier, input_tag, expect_tag, exact=False, repo='yt-dlp/yt-dlp'):
updater._identifier = identifier
updater._exact = exact
updater.requested_repo = repo
result = updater._process_update_spec(lockfile, input_tag)
self.assertEqual(
result, expect_tag,
f'{identifier!r} requesting {repo}@{input_tag} (exact={exact}) '
f'returned {result!r} instead of {expect_tag!r}')
for lockfile in (TEST_LOCKFILE_V1, TEST_LOCKFILE_V2, TEST_LOCKFILE_ACTUAL, TEST_LOCKFILE_FORK):
# Normal operation
test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31')
test(lockfile, 'zip stable Python 3.12.0', '2023.12.31', '2023.12.31', exact=True)
# Python 3.6 --update should update only to its lock
test(lockfile, 'zip Python 3.6.0', '2023.11.16', '2022.08.18.36')
# --update-to an exact version later than the lock should return None
test(lockfile, 'zip stable Python 3.6.0', '2023.11.16', None, exact=True)
# Python 3.7 should be able to update to its lock
test(lockfile, 'zip Python 3.7.0', '2023.11.16', '2023.11.16')
test(lockfile, 'zip stable Python 3.7.1', '2023.11.16', '2023.11.16', exact=True)
# Non-win_x86_exe builds on py3.7 must be locked
test(lockfile, 'zip Python 3.7.1', '2023.12.31', '2023.11.16')
test(lockfile, 'zip stable Python 3.7.1', '2023.12.31', None, exact=True)
test( # Windows Vista w/ win_x86_exe must be locked
lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2',
'2023.12.31', '2023.11.16')
test( # Windows 2008Server w/ win_x86_exe must be locked
lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-2008Server',
'2023.12.31', None, exact=True)
test( # Windows 7 w/ win_x86_exe py3.7 build should be able to update beyond lock
lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1',
'2023.12.31', '2023.12.31')
test( # Windows 8.1 w/ '2008Server' in platform string should be able to update beyond lock
lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-post2008Server-6.2.9200',
'2023.12.31', '2023.12.31', exact=True)
# Forks can block updates to non-numeric tags rather than lock
test(TEST_LOCKFILE_FORK, 'zip Python 3.6.3', 'pr0000', None, repo='fork/yt-dlp')
test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr0000', 'pr0000', repo='fork/yt-dlp')
test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr1234', None, repo='fork/yt-dlp')
test(TEST_LOCKFILE_FORK, 'zip Python 3.8.1', 'pr1234', 'pr1234', repo='fork/yt-dlp', exact=True)
test(
TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2',
'pr1234', None, repo='fork/yt-dlp')
test(
TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1',
'2023.12.31', '2023.12.31', repo='fork/yt-dlp')
test(TEST_LOCKFILE_FORK, 'zip Python 3.11.2', 'pr9999', None, repo='fork/yt-dlp', exact=True)
test(TEST_LOCKFILE_FORK, 'zip stable Python 3.12.0', 'pr9999', 'pr9999', repo='fork/yt-dlp')
def test_query_update(self):
ydl = FakeYDL()
def test(target, expected, current_version=None, current_commit=None, identifier=None):
updater = FakeUpdater(ydl, target)
if current_version:
updater.current_version = current_version
if current_commit:
updater.current_commit = current_commit
updater._identifier = identifier or 'zip'
update_info = updater.query_update(_output=True)
self.assertDictEqual(
update_info.__dict__ if update_info else {}, expected.__dict__ if expected else {})
test('yt-dlp/yt-dlp@latest', UpdateInfo(
'2023.12.31', version='2023.12.31', requested_version='2023.12.31', commit='b' * 40))
test('yt-dlp/yt-dlp-nightly-builds@latest', UpdateInfo(
'2023.12.31.123456', version='2023.12.31.123456', requested_version='2023.12.31.123456', commit='c' * 40))
test('yt-dlp/yt-dlp-master-builds@latest', UpdateInfo(
'2023.12.31.987654', version='2023.12.31.987654', requested_version='2023.12.31.987654', commit='d' * 40))
test('fork/yt-dlp@latest', UpdateInfo(
'2050.12.31', version='2050.12.31', requested_version='2050.12.31', commit='e' * 40))
test('fork/yt-dlp@pr0000', UpdateInfo(
'pr0000', version='2023.11.11.000000', requested_version='2023.11.11.000000', commit='f' * 40))
test('fork/yt-dlp@pr1234', UpdateInfo(
'pr1234', version='2023.12.31.555555', requested_version='2023.12.31.555555', commit='0' * 40))
test('fork/yt-dlp@pr9999', UpdateInfo(
'pr9999', version=None, requested_version=None, commit='1' * 40))
test('fork/yt-dlp-satellite@pr987', UpdateInfo(
'pr987', version=None, requested_version=None, commit='2' * 40))
test('yt-dlp/yt-dlp', None, current_version='2024.01.01')
test('stable', UpdateInfo(
'2023.12.31', version='2023.12.31', requested_version='2023.12.31', commit='b' * 40))
test('nightly', UpdateInfo(
'2023.12.31.123456', version='2023.12.31.123456', requested_version='2023.12.31.123456', commit='c' * 40))
test('master', UpdateInfo(
'2023.12.31.987654', version='2023.12.31.987654', requested_version='2023.12.31.987654', commit='d' * 40))
test('testing', None, current_commit='9' * 40)
test('testing', UpdateInfo('testing', commit='9' * 40))
if __name__ == '__main__':
unittest.main()

View file

@ -1,30 +0,0 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import json
from yt_dlp.update import rsa_verify
class TestUpdate(unittest.TestCase):
def test_rsa_verify(self):
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'versions.json'), 'rb') as f:
versions_info = f.read().decode()
versions_info = json.loads(versions_info)
signature = versions_info['signature']
del versions_info['signature']
self.assertTrue(rsa_verify(
json.dumps(versions_info, sort_keys=True).encode(),
signature, UPDATES_RSA_KEY))
if __name__ == '__main__':
unittest.main()

View file

@ -2110,6 +2110,8 @@ def test_traverse_obj(self):
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str_or_none})),
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
msg='Function in set should be a transformation')
self.assertEqual(traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})), 'const',
msg='Function in set should always be called')
if __debug__:
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
traverse_obj(_TEST_DATA, set())
@ -2317,23 +2319,6 @@ def test_traverse_obj(self):
self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
msg='branching should result in list if `traverse_string`')
# Test is_user_input behavior
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
self.assertEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3'),
is_user_input=True), 3,
msg='allow for string indexing if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', '3:'),
is_user_input=True), tuple(range(8))[3:],
msg='allow for string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':4:2'),
is_user_input=True), tuple(range(8))[:4:2],
msg='allow step in string slice if `is_user_input`')
self.assertCountEqual(traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':'),
is_user_input=True), range(8),
msg='`:` should be treated as `...` if `is_user_input`')
with self.assertRaises(TypeError, msg='too many params should result in error'):
traverse_obj(_IS_USER_INPUT_DATA, ('range8', ':::'), is_user_input=True)
# Test re.Match as input obj
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
@ -2355,6 +2340,58 @@ def test_traverse_obj(self):
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
msg='function on a `re.Match` should give group name as well')
# Test xml.etree.ElementTree.Element as input obj
etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>''')
self.assertEqual(traverse_obj(etree, ''), etree,
msg='empty str key should return the element itself')
self.assertEqual(traverse_obj(etree, 'country'), list(etree),
msg='str key should lead all children with that tag name')
self.assertEqual(traverse_obj(etree, ...), list(etree),
msg='`...` as key should return all children')
self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
msg='function as key should get element as value')
self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
msg='function as key should get index as key')
self.assertEqual(traverse_obj(etree, 0), etree[0],
msg='int key should return the nth child')
self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
msg='`@<attribute>` at end of path should give that attribute')
self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
msg='`@<nonexistant>` at end of path should give `None`')
self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
msg='`@` should give the full attribute dict')
self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
msg='`text()` at end of path should give the inner text')
self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
msg='full python xpath features should be supported')
self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
msg='special transformations should act on current element')
self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100],
msg='special transformations should act on current element')
def test_http_header_dict(self):
headers = HTTPHeaderDict()
headers['ytdl-test'] = b'0'
@ -2387,6 +2424,11 @@ def test_http_header_dict(self):
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
# common mistake: strip whitespace from values
# https://github.com/yt-dlp/yt-dlp/issues/8729
headers5 = HTTPHeaderDict({'ytdl-test': ' data; '})
self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')})
def test_extract_basic_auth(self):
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)

383
test/test_websockets.py Normal file
View file

@ -0,0 +1,383 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import pytest
from test.helper import verify_address_availability
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import http.client
import http.cookiejar
import http.server
import json
import random
import ssl
import threading
from yt_dlp import socks
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import websockets
from yt_dlp.networking import Request
from yt_dlp.networking.exceptions import (
CertificateVerifyError,
HTTPError,
ProxyError,
RequestError,
SSLError,
TransportError,
)
from yt_dlp.utils.networking import HTTPHeaderDict
from test.conftest import validate_and_send
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def websocket_handler(websocket):
for message in websocket:
if isinstance(message, bytes):
if message == b'bytes':
return websocket.send('2')
elif isinstance(message, str):
if message == 'headers':
return websocket.send(json.dumps(dict(websocket.request.headers)))
elif message == 'path':
return websocket.send(websocket.request.path)
elif message == 'source_address':
return websocket.send(websocket.remote_address[0])
elif message == 'str':
return websocket.send('1')
return websocket.send(message)
def process_request(self, request):
if request.path.startswith('/gen_'):
status = http.HTTPStatus(int(request.path[5:]))
if 300 <= status.value <= 300:
return websockets.http11.Response(
status.value, status.phrase, websockets.datastructures.Headers([('Location', '/')]), b'')
return self.protocol.reject(status.value, status.phrase)
return self.protocol.accept(request)
def create_websocket_server(**ws_kwargs):
import websockets.sync.server
wsd = websockets.sync.server.serve(websocket_handler, '127.0.0.1', 0, process_request=process_request, **ws_kwargs)
ws_port = wsd.socket.getsockname()[1]
ws_server_thread = threading.Thread(target=wsd.serve_forever)
ws_server_thread.daemon = True
ws_server_thread.start()
return ws_server_thread, ws_port
def create_ws_websocket_server():
return create_websocket_server()
def create_wss_websocket_server():
certfn = os.path.join(TEST_DIR, 'testcert.pem')
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.load_cert_chain(certfn, None)
return create_websocket_server(ssl_context=sslctx)
MTLS_CERT_DIR = os.path.join(TEST_DIR, 'testdata', 'certificate')
def create_mtls_wss_websocket_server():
certfn = os.path.join(TEST_DIR, 'testcert.pem')
cacertfn = os.path.join(MTLS_CERT_DIR, 'ca.crt')
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.verify_mode = ssl.CERT_REQUIRED
sslctx.load_verify_locations(cafile=cacertfn)
sslctx.load_cert_chain(certfn, None)
return create_websocket_server(ssl_context=sslctx)
@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
class TestWebsSocketRequestHandlerConformance:
@classmethod
def setup_class(cls):
cls.ws_thread, cls.ws_port = create_ws_websocket_server()
cls.ws_base_url = f'ws://127.0.0.1:{cls.ws_port}'
cls.wss_thread, cls.wss_port = create_wss_websocket_server()
cls.wss_base_url = f'wss://127.0.0.1:{cls.wss_port}'
cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl_context=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER))
cls.bad_wss_host = f'wss://127.0.0.1:{cls.bad_wss_port}'
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_basic_websockets(self, handler):
with handler() as rh:
ws = validate_and_send(rh, Request(self.ws_base_url))
assert 'upgrade' in ws.headers
assert ws.status == 101
ws.send('foo')
assert ws.recv() == 'foo'
ws.close()
# https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
@pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_send_types(self, handler, msg, opcode):
with handler() as rh:
ws = validate_and_send(rh, Request(self.ws_base_url))
ws.send(msg)
assert int(ws.recv()) == opcode
ws.close()
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_verify_cert(self, handler):
with handler() as rh:
with pytest.raises(CertificateVerifyError):
validate_and_send(rh, Request(self.wss_base_url))
with handler(verify=False) as rh:
ws = validate_and_send(rh, Request(self.wss_base_url))
assert ws.status == 101
ws.close()
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_ssl_error(self, handler):
with handler(verify=False) as rh:
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
validate_and_send(rh, Request(self.bad_wss_host))
assert not issubclass(exc_info.type, CertificateVerifyError)
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('path,expected', [
# Unicode characters should be encoded with uppercase percent-encoding
('/中文', '/%E4%B8%AD%E6%96%87'),
# don't normalize existing percent encodings
('/%c7%9f', '/%c7%9f'),
])
def test_percent_encode(self, handler, path, expected):
with handler() as rh:
ws = validate_and_send(rh, Request(f'{self.ws_base_url}{path}'))
ws.send('path')
assert ws.recv() == expected
assert ws.status == 101
ws.close()
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_remove_dot_segments(self, handler):
with handler() as rh:
# This isn't a comprehensive test,
# but it should be enough to check whether the handler is removing dot segments
ws = validate_and_send(rh, Request(f'{self.ws_base_url}/a/b/./../../test'))
assert ws.status == 101
ws.send('path')
assert ws.recv() == '/test'
ws.close()
# We are restricted to known HTTP status codes in http.HTTPStatus
# Redirects are not supported for websockets
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
def test_raise_http_error(self, handler, status):
with handler() as rh:
with pytest.raises(HTTPError) as exc_info:
validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
assert exc_info.value.status == status
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('params,extensions', [
({'timeout': 0.00001}, {}),
({}, {'timeout': 0.00001}),
])
def test_timeout(self, handler, params, extensions):
with handler(**params) as rh:
with pytest.raises(TransportError):
validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_cookies(self, handler):
cookiejar = YoutubeDLCookieJar()
cookiejar.set_cookie(http.cookiejar.Cookie(
version=0, name='test', value='ytdlp', port=None, port_specified=False,
domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
path_specified=True, secure=False, expires=None, discard=False, comment=None,
comment_url=None, rest={}))
with handler(cookiejar=cookiejar) as rh:
ws = validate_and_send(rh, Request(self.ws_base_url))
ws.send('headers')
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
ws.close()
with handler() as rh:
ws = validate_and_send(rh, Request(self.ws_base_url))
ws.send('headers')
assert 'cookie' not in json.loads(ws.recv())
ws.close()
ws = validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': cookiejar}))
ws.send('headers')
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
ws.close()
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_source_address(self, handler):
source_address = f'127.0.0.{random.randint(5, 255)}'
verify_address_availability(source_address)
with handler(source_address=source_address) as rh:
ws = validate_and_send(rh, Request(self.ws_base_url))
ws.send('source_address')
assert source_address == ws.recv()
ws.close()
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_response_url(self, handler):
with handler() as rh:
url = f'{self.ws_base_url}/something'
ws = validate_and_send(rh, Request(url))
assert ws.url == url
ws.close()
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_request_headers(self, handler):
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
# Global Headers
ws = validate_and_send(rh, Request(self.ws_base_url))
ws.send('headers')
headers = HTTPHeaderDict(json.loads(ws.recv()))
assert headers['test1'] == 'test'
ws.close()
# Per request headers, merged with global
ws = validate_and_send(rh, Request(
self.ws_base_url, headers={'test2': 'changed', 'test3': 'test3'}))
ws.send('headers')
headers = HTTPHeaderDict(json.loads(ws.recv()))
assert headers['test1'] == 'test'
assert headers['test2'] == 'changed'
assert headers['test3'] == 'test3'
ws.close()
@pytest.mark.parametrize('client_cert', (
{'client_certificate': os.path.join(MTLS_CERT_DIR, 'clientwithkey.crt')},
{
'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'),
'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'client.key'),
},
{
'client_certificate': os.path.join(MTLS_CERT_DIR, 'clientwithencryptedkey.crt'),
'client_certificate_password': 'foobar',
},
{
'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'),
'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'clientencrypted.key'),
'client_certificate_password': 'foobar',
}
))
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
def test_mtls(self, handler, client_cert):
with handler(
# Disable client-side validation of unacceptable self-signed testcert.pem
# The test is of a check on the server side, so unaffected
verify=False,
client_cert=client_cert
) as rh:
validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
def create_fake_ws_connection(raised):
import websockets.sync.client
class FakeWsConnection(websockets.sync.client.ClientConnection):
def __init__(self, *args, **kwargs):
class FakeResponse:
body = b''
headers = {}
status_code = 101
reason_phrase = 'test'
self.response = FakeResponse()
def send(self, *args, **kwargs):
raise raised()
def recv(self, *args, **kwargs):
raise raised()
def close(self, *args, **kwargs):
return
return FakeWsConnection()
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
class TestWebsocketsRequestHandler:
@pytest.mark.parametrize('raised,expected', [
# https://websockets.readthedocs.io/en/stable/reference/exceptions.html
(lambda: websockets.exceptions.InvalidURI(msg='test', uri='test://'), RequestError),
# Requires a response object. Should be covered by HTTP error tests.
# (lambda: websockets.exceptions.InvalidStatus(), TransportError),
(lambda: websockets.exceptions.InvalidHandshake(), TransportError),
# These are subclasses of InvalidHandshake
(lambda: websockets.exceptions.InvalidHeader(name='test'), TransportError),
(lambda: websockets.exceptions.NegotiationError(), TransportError),
# Catch-all
(lambda: websockets.exceptions.WebSocketException(), TransportError),
(lambda: TimeoutError(), TransportError),
# These may be raised by our create_connection implementation, which should also be caught
(lambda: OSError(), TransportError),
(lambda: ssl.SSLError(), SSLError),
(lambda: ssl.SSLCertVerificationError(), CertificateVerifyError),
(lambda: socks.ProxyError(), ProxyError),
])
def test_request_error_mapping(self, handler, monkeypatch, raised, expected):
import websockets.sync.client
import yt_dlp.networking._websockets
with handler() as rh:
def fake_connect(*args, **kwargs):
raise raised()
monkeypatch.setattr(yt_dlp.networking._websockets, 'create_connection', lambda *args, **kwargs: None)
monkeypatch.setattr(websockets.sync.client, 'connect', fake_connect)
with pytest.raises(expected) as exc_info:
rh.send(Request('ws://fake-url'))
assert exc_info.type is expected
@pytest.mark.parametrize('raised,expected,match', [
# https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send
(lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None),
(lambda: RuntimeError(), TransportError, None),
(lambda: TimeoutError(), TransportError, None),
(lambda: TypeError(), RequestError, None),
(lambda: socks.ProxyError(), ProxyError, None),
# Catch-all
(lambda: websockets.exceptions.WebSocketException(), TransportError, None),
])
def test_ws_send_error_mapping(self, handler, monkeypatch, raised, expected, match):
from yt_dlp.networking._websockets import WebsocketsResponseAdapter
ws = WebsocketsResponseAdapter(create_fake_ws_connection(raised), url='ws://fake-url')
with pytest.raises(expected, match=match) as exc_info:
ws.send('test')
assert exc_info.type is expected
@pytest.mark.parametrize('raised,expected,match', [
# https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv
(lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None),
(lambda: RuntimeError(), TransportError, None),
(lambda: TimeoutError(), TransportError, None),
(lambda: socks.ProxyError(), ProxyError, None),
# Catch-all
(lambda: websockets.exceptions.WebSocketException(), TransportError, None),
])
def test_ws_recv_error_mapping(self, handler, monkeypatch, raised, expected, match):
from yt_dlp.networking._websockets import WebsocketsResponseAdapter
ws = WebsocketsResponseAdapter(create_fake_ws_connection(raised), url='ws://fake-url')
with pytest.raises(expected, match=match) as exc_info:
ws.recv()
assert exc_info.type is expected

View file

@ -1,34 +0,0 @@
{
"latest": "2013.01.06",
"signature": "72158cdba391628569ffdbea259afbcf279bbe3d8aeb7492690735dc1cfa6afa754f55c61196f3871d429599ab22f2667f1fec98865527b32632e7f4b3675a7ef0f0fbe084d359256ae4bba68f0d33854e531a70754712f244be71d4b92e664302aa99653ee4df19800d955b6c4149cd2b3f24288d6e4b40b16126e01f4c8ce6",
"versions": {
"2013.01.02": {
"bin": [
"http://youtube-dl.org/downloads/2013.01.02/youtube-dl",
"f5b502f8aaa77675c4884938b1e4871ebca2611813a0c0e74f60c0fbd6dcca6b"
],
"exe": [
"http://youtube-dl.org/downloads/2013.01.02/youtube-dl.exe",
"75fa89d2ce297d102ff27675aa9d92545bbc91013f52ec52868c069f4f9f0422"
],
"tar": [
"http://youtube-dl.org/downloads/2013.01.02/youtube-dl-2013.01.02.tar.gz",
"6a66d022ac8e1c13da284036288a133ec8dba003b7bd3a5179d0c0daca8c8196"
]
},
"2013.01.06": {
"bin": [
"http://youtube-dl.org/downloads/2013.01.06/youtube-dl",
"64b6ed8865735c6302e836d4d832577321b4519aa02640dc508580c1ee824049"
],
"exe": [
"http://youtube-dl.org/downloads/2013.01.06/youtube-dl.exe",
"58609baf91e4389d36e3ba586e21dab882daaaee537e4448b1265392ae86ff84"
],
"tar": [
"http://youtube-dl.org/downloads/2013.01.06/youtube-dl-2013.01.06.tar.gz",
"fe77ab20a95d980ed17a659aa67e371fdd4d656d19c4c7950e7b720b0c2f1a86"
]
}
}
}

View file

@ -1 +1 @@
@py -bb -Werror -Xdev "%~dp0yt_dlp\__main__.py" %*
@py -Werror -Xdev "%~dp0yt_dlp\__main__.py" %*

View file

@ -1,2 +1,2 @@
#!/usr/bin/env sh
exec "${PYTHON:-python3}" -bb -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@"
exec "${PYTHON:-python3}" -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@"

View file

@ -40,7 +40,6 @@
NoSupportingHandlers,
RequestError,
SSLError,
_CompatHTTPError,
network_exceptions,
)
from .plugins import directories as plugin_directories
@ -60,7 +59,13 @@
get_postprocessor,
)
from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
from .update import REPOSITORY, _get_system_deprecation, current_git_head, detect_variant
from .update import (
REPOSITORY,
_get_system_deprecation,
_make_label,
current_git_head,
detect_variant,
)
from .utils import (
DEFAULT_OUTTMPL,
IDENTITY,
@ -158,7 +163,7 @@
clean_proxies,
std_headers,
)
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
if compat_os_name == 'nt':
import ctypes
@ -625,13 +630,16 @@ def __init__(self, params=None, auto_init=True):
'Overwriting params from "color" with "no_color"')
self.params['color'] = 'no_color'
term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
no_color = bool(os.getenv('NO_COLOR'))
def process_color_policy(stream):
stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
if policy in ('auto', None):
return term_allow_color and supports_terminal_sequences(stream)
if term_allow_color and supports_terminal_sequences(stream):
return 'no_color' if no_color else True
return False
assert policy in ('always', 'never', 'no_color'), policy
return {'always': True, 'never': False}.get(policy, policy)
@ -1176,6 +1184,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
MATH_FUNCTIONS = {
'+': float.__add__,
'-': float.__sub__,
'*': float.__mul__,
}
# Field is of the form key1.key2...
# where keys (except first) can be string, int, slice or "{field, ...}"
@ -1197,6 +1206,15 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
(?:\|(?P<default>.*?))?
)$''')
def _from_user_input(field):
if field == ':':
return ...
elif ':' in field:
return slice(*map(int_or_none, field.split(':')))
elif int_or_none(field) is not None:
return int(field)
return field
def _traverse_infodict(fields):
fields = [f for x in re.split(r'\.({.+?})\.?', fields)
for f in ([x] if x.startswith('{') else x.split('.'))]
@ -1206,11 +1224,12 @@ def _traverse_infodict(fields):
for i, f in enumerate(fields):
if not f.startswith('{'):
fields[i] = _from_user_input(f)
continue
assert f.endswith('}'), f'No closing brace for {f} in {fields}'
fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
return traverse_obj(info_dict, fields, traverse_string=True)
def get_value(mdict):
# Object traversal
@ -2451,9 +2470,16 @@ def final_selector(ctx):
return selector_function(ctx_copy)
return final_selector
stream = io.BytesIO(format_spec.encode())
# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
# Prefix numbers with random letters to avoid it being classified as a number
# See: https://github.com/yt-dlp/yt-dlp/pulls/8797
# TODO: Implement parser not reliant on tokenize.tokenize
prefix = ''.join(random.choices(string.ascii_letters, k=32))
stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
try:
tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
tokens = list(_remove_unused_ops(
token._replace(string=token.string.replace(prefix, ''))
for token in tokenize.tokenize(stream.readline)))
except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
@ -2586,6 +2612,9 @@ def _fill_common_fields(self, info_dict, final=True):
upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
info_dict[date_key] = upload_date.strftime('%Y%m%d')
if not info_dict.get('release_year'):
info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
live_keys = ('is_live', 'was_live')
live_status = info_dict.get('live_status')
if live_status is None:
@ -3544,7 +3573,7 @@ def sanitize_info(info_dict, remove_private_keys=False):
'version': __version__,
'current_git_head': current_git_head(),
'release_git_head': RELEASE_GIT_HEAD,
'repository': REPOSITORY,
'repository': ORIGIN,
})
if remove_private_keys:
@ -3927,8 +3956,8 @@ def get_encoding(stream):
source += '*'
klass = type(self)
write_debug(join_nonempty(
f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
f'{CHANNEL}@{__version__}',
f'{REPOSITORY.rpartition("/")[2]} version',
_make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
'' if source == 'unknown' else f'({source})',
'' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
@ -4052,6 +4081,7 @@ def urlopen(self, req):
return self._request_director.send(req)
except NoSupportingHandlers as e:
for ue in e.unsupported_errors:
# FIXME: This depends on the order of errors.
if not (ue.handler and ue.msg):
continue
if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
@ -4061,6 +4091,15 @@ def urlopen(self, req):
if 'unsupported proxy type: "https"' in ue.msg.lower():
raise RequestError(
'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
elif (
re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
and 'websockets' not in self._request_director.handlers
):
raise RequestError(
'This request requires WebSocket support. '
'Ensure one of the following dependencies are installed: websockets',
cause=ue) from ue
raise
except SSLError as e:
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
@ -4070,8 +4109,6 @@ def urlopen(self, req):
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
'Try using --legacy-server-connect', cause=e) from e
raise
except HTTPError as e: # TODO: Remove in a future release
raise _CompatHTTPError(e) from e
def build_request_director(self, handlers, preferences=None):
logger = _YDLLogger(self)

View file

@ -1,8 +1,8 @@
try:
import contextvars # noqa: F401
except Exception:
raise Exception(
f'You are using an unsupported version of Python. Only Python versions 3.7 and above are supported by yt-dlp') # noqa: F541
import sys
if sys.version_info < (3, 8):
raise ImportError(
f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541
__license__ = 'Public Domain'
@ -12,7 +12,6 @@
import optparse
import os
import re
import sys
import traceback
from .compat import compat_shlex_quote
@ -74,14 +73,16 @@ def _exit(status=0, *args):
def get_urls(urls, batchfile, verbose):
# Batch file verification
"""
@param verbose -1: quiet, 0: normal, 1: verbose
"""
batch_urls = []
if batchfile is not None:
try:
batch_urls = read_batch_urls(
read_stdin('URLs') if batchfile == '-'
read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-'
else open(expand_path(batchfile), encoding='utf-8', errors='ignore'))
if verbose:
if verbose == 1:
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
except OSError:
_exit(f'ERROR: batch file {batchfile} could not be read')
@ -722,7 +723,7 @@ def get_postprocessors(opts):
def parse_options(argv=None):
"""@returns ParsedOptions(parser, opts, urls, ydl_opts)"""
parser, opts, urls = parseOpts(argv)
urls = get_urls(urls, opts.batchfile, opts.verbose)
urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose)
set_compat_opts(opts)
try:

View file

@ -25,7 +25,7 @@ def get_hidden_imports():
for module in ('websockets', 'requests', 'urllib3'):
yield from collect_submodules(module)
# These are auto-detected, but explicitly add them just in case
yield from ('mutagen', 'brotli', 'certifi')
yield from ('mutagen', 'brotli', 'certifi', 'secretstorage')
hiddenimports = list(get_hidden_imports())

View file

@ -35,6 +35,7 @@
from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
@ -70,7 +71,6 @@ def compat_setenv(key, value, env=os.environ):
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
compat_http_client = http.client
compat_http_server = http.server
compat_HTTPError = urllib.error.HTTPError
compat_input = input
compat_integer_types = (int, )
compat_itertools_count = itertools.count
@ -88,7 +88,7 @@ def compat_setenv(key, value, env=os.environ):
compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error
compat_urllib_HTTPError = urllib.error.HTTPError
compat_urllib_HTTPError = compat_HTTPError
compat_urllib_parse = urllib.parse
compat_urllib_parse_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_quote = urllib.parse.quote

View file

@ -10,17 +10,3 @@
cache # >= 3.9
except NameError:
cache = lru_cache(maxsize=None)
try:
cached_property # >= 3.8
except NameError:
class cached_property:
def __init__(self, func):
update_wrapper(self, func)
self.func = func
def __get__(self, instance, _):
if instance is None:
return self
setattr(instance, self.func.__name__, self.func(instance))
return getattr(instance, self.func.__name__)

View file

@ -186,7 +186,7 @@ def _firefox_browser_dir():
if sys.platform in ('cygwin', 'win32'):
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
elif sys.platform == 'darwin':
return os.path.expanduser('~/Library/Application Support/Firefox')
return os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
return os.path.expanduser('~/.mozilla/firefox')

View file

@ -369,7 +369,10 @@ def fin_fragments():
return output.getvalue().encode()
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
if len(fragments) == 1:
self.download_and_append_fragments(ctx, fragments, info_dict)
else:
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
else:
return self.download_and_append_fragments(ctx, fragments, info_dict)

View file

@ -6,7 +6,7 @@
from .common import FileDownloader
from .external import FFmpegFD
from ..networking import Request
from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
from ..utils import DownloadError, str_or_none, try_get
class NiconicoDmcFD(FileDownloader):
@ -64,7 +64,6 @@ def real_download(self, filename, info_dict):
ws_url = info_dict['url']
ws_extractor = info_dict['ws']
ws_origin_host = info_dict['origin']
cookies = info_dict.get('cookies')
live_quality = info_dict.get('live_quality', 'high')
live_latency = info_dict.get('live_latency', 'high')
dl = FFmpegFD(self.ydl, self.params or {})
@ -76,12 +75,7 @@ def real_download(self, filename, info_dict):
def communicate_ws(reconnect):
if reconnect:
ws = WebSocketsWrapper(ws_url, {
'Cookies': str_or_none(cookies) or '',
'Origin': f'https://{ws_origin_host}',
'Accept': '*/*',
'User-Agent': self.params['http_headers']['User-Agent'],
})
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
if self.ydl.params.get('verbose', False):
self.to_screen('[debug] Sending startWatching request')
ws.send(json.dumps({

View file

@ -47,7 +47,7 @@
ACastChannelIE,
)
from .acfun import AcFunVideoIE, AcFunBangumiIE
from .adn import ADNIE
from .adn import ADNIE, ADNSeasonIE
from .adobeconnect import AdobeConnectIE
from .adobetv import (
AdobeTVEmbedIE,
@ -77,16 +77,24 @@
WyborczaPodcastIE,
WyborczaVideoIE,
)
from .airmozilla import AirMozillaIE
from .airtv import AirTVIE
from .aitube import AitubeKZVideoIE
from .aljazeera import AlJazeeraIE
from .allstar import (
AllstarIE,
AllstarProfileIE,
)
from .alphaporno import AlphaPornoIE
from .amara import AmaraIE
from .altcensored import (
AltCensoredIE,
AltCensoredChannelIE,
)
from .alura import (
AluraIE,
AluraCourseIE
)
from .amadeustv import AmadeusTVIE
from .amara import AmaraIE
from .amcnetworks import AMCNetworksIE
from .amazon import (
AmazonStoreIE,
@ -127,8 +135,12 @@
from .arkena import ArkenaIE
from .ard import (
ARDBetaMediathekIE,
ARDMediathekCollectionIE,
ARDIE,
ARDMediathekIE,
)
from .art19 import (
Art19IE,
Art19ShowIE,
)
from .arte import (
ArteTVIE,
@ -137,9 +149,9 @@
ArteTVCategoryIE,
)
from .arnes import ArnesIE
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE
from .atttechchannel import ATTTechChannelIE
from .atvat import ATVAtIE
from .audimedia import AudiMediaIE
from .audioboom import AudioBoomIE
@ -212,6 +224,8 @@
BiliBiliBangumiIE,
BiliBiliBangumiSeasonIE,
BiliBiliBangumiMediaIE,
BilibiliCheeseIE,
BilibiliCheeseSeasonIE,
BiliBiliSearchIE,
BilibiliCategoryIE,
BilibiliAudioIE,
@ -233,11 +247,6 @@
BitChuteIE,
BitChuteChannelIE,
)
from .bitwave import (
BitwaveReplayIE,
BitwaveStreamIE,
)
from .biqle import BIQLEIE
from .blackboardcollaborate import BlackboardCollaborateIE
from .bleacherreport import (
BleacherReportIE,
@ -252,10 +261,7 @@
from .box import BoxIE
from .boxcast import BoxCastVideoIE
from .bpb import BpbIE
from .br import (
BRIE,
BRMediathekIE,
)
from .br import BRIE
from .bravotv import BravoTVIE
from .brainpop import (
BrainPOPIE,
@ -265,7 +271,6 @@
BrainPOPFrIE,
BrainPOPIlIE,
)
from .breakcom import BreakIE
from .breitbart import BreitBartIE
from .brightcove import (
BrightcoveLegacyIE,
@ -277,6 +282,7 @@
)
from .businessinsider import BusinessInsiderIE
from .bundesliga import BundesligaIE
from .bundestag import BundestagIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
@ -295,16 +301,11 @@
from .cammodels import CamModelsIE
from .camsoda import CamsodaIE
from .camtasia import CamtasiaEmbedIE
from .camwithher import CamWithHerIE
from .canal1 import Canal1IE
from .canalalpha import CanalAlphaIE
from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE
from .caracoltv import CaracolTvPlayIE
from .carambatv import (
CarambaTVIE,
CarambaTVPageIE,
)
from .cartoonnetwork import CartoonNetworkIE
from .cbc import (
CBCIE,
@ -343,7 +344,6 @@
from .cellebrite import CellebriteIE
from .ceskatelevize import CeskaTelevizeIE
from .cgtn import CGTNIE
from .channel9 import Channel9IE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
from .chilloutzone import ChilloutzoneIE
@ -351,11 +351,10 @@
ChingariIE,
ChingariUserIE,
)
from .chirbit import (
ChirbitIE,
ChirbitProfileIE,
from .chzzk import (
CHZZKLiveIE,
CHZZKVideoIE,
)
from .cinchcast import CinchcastIE
from .cinemax import CinemaxIE
from .cinetecamilano import CinetecaMilanoIE
from .cineverse import (
@ -372,10 +371,9 @@
from .cliphunter import CliphunterIE
from .clippit import ClippitIE
from .cliprs import ClipRsIE
from .clipsyndicate import ClipsyndicateIE
from .closertotruth import CloserToTruthIE
from .cloudflarestream import CloudflareStreamIE
from .cloudy import CloudyIE
from .cloudycdn import CloudyCDNIE
from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
@ -442,7 +440,6 @@
DacastVODIE,
DacastPlaylistIE,
)
from .daftsex import DaftsexIE
from .dailymail import DailyMailIE
from .dailymotion import (
DailymotionIE,
@ -479,7 +476,6 @@
from .dfb import DFBIE
from .dhm import DHMIE
from .digg import DiggIE
from .dotsub import DotsubIE
from .douyutv import (
DouyuShowIE,
DouyuTVIE,
@ -526,7 +522,6 @@
DubokuPlaylistIE
)
from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE
from .deuxm import (
DeuxMIE,
DeuxMNewsIE
@ -541,6 +536,7 @@
DropoutSeasonIE,
DropoutIE
)
from .duoplay import DuoplayIE
from .dw import (
DWIE,
DWArticleIE,
@ -548,29 +544,23 @@
from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE
from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE
from .echomsk import EchoMskIE
from .egghead import (
EggheadCourseIE,
EggheadLessonIE,
)
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .einthusan import EinthusanIE
from .eitb import EitbIE
from .elevensports import ElevenSportsIE
from .ellentube import (
EllenTubeIE,
EllenTubeVideoIE,
EllenTubePlaylistIE,
)
from .elementorembed import ElementorEmbedIE
from .elonet import ElonetIE
from .elpais import ElPaisIE
from .eltrecetv import ElTreceTVIE
from .embedly import EmbedlyIE
from .engadget import EngadgetIE
from .epicon import (
EpiconIE,
EpiconSeriesIE,
)
from .epidemicsound import EpidemicSoundIE
from .eplus import EplusIbIE
from .epoch import EpochIE
from .eporner import EpornerIE
@ -579,12 +569,12 @@
EroProfileIE,
EroProfileAlbumIE,
)
from .err import ERRJupiterIE
from .ertgr import (
ERTFlixCodenameIE,
ERTFlixIE,
ERTWebtvEmbedIE,
)
from .escapist import EscapistIE
from .espn import (
ESPNIE,
WatchESPNIE,
@ -592,21 +582,19 @@
FiveThirtyEightIE,
ESPNCricInfoIE,
)
from .esri import EsriVideoIE
from .ettutv import EttuTvIE
from .europa import EuropaIE, EuroParlWebstreamIE
from .europeantour import EuropeanTourIE
from .eurosport import EurosportIE
from .euscreen import EUScreenIE
from .expotv import ExpoTVIE
from .expressen import ExpressenIE
from .extremetube import ExtremeTubeIE
from .eyedotv import EyedoTVIE
from .facebook import (
FacebookIE,
FacebookPluginsVideoIE,
FacebookRedirectURLIE,
FacebookReelIE,
FacebookAdsIE,
)
from .fancode import (
FancodeVodIE,
@ -630,6 +618,10 @@
from .firsttv import FirstTVIE
from .fivetv import FiveTVIE
from .flickr import FlickrIE
from .floatplane import (
FloatplaneIE,
FloatplaneChannelIE,
)
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
from .formula1 import Formula1IE
@ -639,16 +631,11 @@
PornerBrosIE,
FuxIE,
)
from .fourzerostudio import (
FourZeroStudioArchiveIE,
FourZeroStudioClipIE,
)
from .fox import FOXIE
from .fox9 import (
FOX9IE,
FOX9NewsIE,
)
from .foxgay import FoxgayIE
from .foxnews import (
FoxNewsIE,
FoxNewsArticleIE,
@ -681,7 +668,6 @@
)
from .funk import FunkIE
from .funker530 import Funker530IE
from .fusion import FusionIE
from .fuyintv import FuyinTVIE
from .gab import (
GabTVIE,
@ -708,11 +694,14 @@
GeniusIE,
GeniusLyricsIE,
)
from .getcourseru import (
GetCourseRuPlayerIE,
GetCourseRuIE
)
from .gettr import (
GettrIE,
GettrStreamingIE,
)
from .gfycat import GfycatIE
from .giantbomb import GiantBombIE
from .giga import GigaIE
from .glide import GlideIE
@ -758,12 +747,10 @@
from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .hgtv import HGTVComShowIE
from .hketv import HKETVIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
from .hitrecord import HitRecordIE
from .hollywoodreporter import (
HollywoodReporterIE,
@ -778,8 +765,6 @@
HotStarSeasonIE,
HotStarSeriesIE,
)
from .howcast import HowcastIE
from .howstuffworks import HowStuffWorksIE
from .hrefli import HrefLiRedirectIE
from .hrfensehen import HRFernsehenIE
from .hrti import (
@ -820,6 +805,7 @@
IHeartRadioIE,
IHeartRadioPodcastIE,
)
from .ilpost import IlPostIE
from .iltalehti import IltalehtiIE
from .imdb import (
ImdbIE,
@ -893,8 +879,13 @@
SangiinIE,
)
from .jeuxvideo import JeuxVideoIE
from .jiosaavn import (
JioSaavnSongIE,
JioSaavnAlbumIE,
)
from .jove import JoveIE
from .joj import JojIE
from .joqrag import JoqrAgIE
from .jstream import JStreamIE
from .jtbc import (
JTBCIE,
@ -907,7 +898,6 @@
from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE
from .kelbyone import KelbyOneIE
from .khanacademy import (
KhanAcademyIE,
@ -928,6 +918,7 @@
from .kth import KTHIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .kukululive import KukuluLiveIE
from .kusi import KUSIIE
from .kuwo import (
KuwoIE,
@ -942,17 +933,12 @@
LA7PodcastEpisodeIE,
LA7PodcastIE,
)
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
EHFTVIE,
ITTFIE,
)
from .lastfm import (
LastFMIE,
LastFMPlaylistIE,
LastFMUserIE,
)
from .laxarxames import LaXarxaMesIE
from .lbry import (
LBRYIE,
LBRYChannelIE,
@ -1001,7 +987,6 @@
LinkedInLearningIE,
LinkedInLearningCourseIE,
)
from .linuxacademy import LinuxAcademyIE
from .liputan6 import Liputan6IE
from .listennotes import ListenNotesIE
from .litv import LiTVIE
@ -1022,6 +1007,11 @@
LRTVODIE,
LRTStreamIE
)
from .lsm import (
LSMLREmbedIE,
LSMLTVEmbedIE,
LSMReplayIE
)
from .lumni import (
LumniIE
)
@ -1029,9 +1019,9 @@
LyndaIE,
LyndaCourseIE
)
from .m6 import M6IE
from .maariv import MaarivIE
from .magellantv import MagellanTVIE
from .magentamusik360 import MagentaMusik360IE
from .magentamusik import MagentaMusikIE
from .mailru import (
MailRuIE,
MailRuMusicIE,
@ -1080,10 +1070,7 @@
from .megaphone import MegaphoneIE
from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .meta import METAIE
from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mgoon import MgoonIE
from .mgtv import MGTVIE
from .miaopai import MiaoPaiIE
from .microsoftstream import MicrosoftStreamIE
@ -1105,7 +1092,6 @@
)
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .miomio import MioMioIE
from .mirrativ import (
MirrativIE,
MirrativUserIE,
@ -1129,13 +1115,7 @@
MLBArticleIE,
)
from .mlssoccer import MLSSoccerIE
from .mnet import MnetIE
from .mocha import MochaVideoIE
from .moevideo import MoeVideoIE
from .mofosex import (
MofosexIE,
MofosexEmbedIE,
)
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
from .morningstar import MorningstarIE
@ -1143,9 +1123,9 @@
MotherlessIE,
MotherlessGroupIE,
MotherlessGalleryIE,
MotherlessUploaderIE,
)
from .motorsport import MotorsportIE
from .movieclips import MovieClipsIE
from .moviepilot import MoviepilotIE
from .moview import MoviewPlayIE
from .moviezine import MoviezineIE
@ -1170,18 +1150,17 @@
MusicdexArtistIE,
MusicdexPlaylistIE,
)
from .mwave import MwaveIE, MwaveMeetGreetIE
from .mx3 import (
Mx3IE,
Mx3NeoIE,
Mx3VolksmusikIE,
)
from .mxplayer import (
MxplayerIE,
MxplayerShowIE,
)
from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE
from .myspass import MySpassIE
from .myvi import (
MyviIE,
MyviEmbedIE,
)
from .myvideoge import MyVideoGeIE
from .myvidster import MyVidsterIE
from .mzaalo import MzaaloIE
@ -1230,6 +1209,7 @@
from .ndtv import NDTVIE
from .nebula import (
NebulaIE,
NebulaClassIE,
NebulaSubscriptionsIE,
NebulaChannelIE,
)
@ -1256,7 +1236,6 @@
NewgroundsUserIE,
)
from .newspicks import NewsPicksIE
from .newstube import NewstubeIE
from .newsy import NewsyIE
from .nextmedia import (
NextMediaIE,
@ -1291,7 +1270,6 @@
NickIE,
NickBrIE,
NickDeIE,
NickNightIE,
NickRuIE,
)
from .niconico import (
@ -1316,17 +1294,15 @@
NiconicoChannelPlusChannelLivesIE,
)
from .ninegag import NineGagIE
from .ninenews import NineNewsIE
from .ninenow import NineNowIE
from .nintendo import NintendoIE
from .nitter import NitterIE
from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE
from .noice import NoicePodcastIE
from .nonktube import NonkTubeIE
from .noodlemagazine import NoodleMagazineIE
from .noovo import NoovoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
from .nosnl import NOSNLArticleIE
from .nova import (
NovaEmbedIE,
@ -1387,7 +1363,10 @@
from .oktoberfesttv import OktoberfestTVIE
from .olympics import OlympicsReplayIE
from .on24 import On24IE
from .ondemandkorea import OnDemandKoreaIE
from .ondemandkorea import (
OnDemandKoreaIE,
OnDemandKoreaProgramIE,
)
from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE
@ -1398,10 +1377,6 @@
OnetPlIE,
)
from .onionstudios import OnionStudiosIE
from .ooyala import (
OoyalaIE,
OoyalaExternalIE,
)
from .opencast import (
OpencastIE,
OpencastPlaylistIE,
@ -1416,6 +1391,7 @@
ORFTVthekIE,
ORFFM4StoryIE,
ORFRadioIE,
ORFPodcastIE,
ORFIPTVIE,
)
from .outsidetv import OutsideTVIE
@ -1429,7 +1405,6 @@
PalcoMP3ArtistIE,
PalcoMP3VideoIE,
)
from .pandoratv import PandoraTVIE
from .panopto import (
PanoptoIE,
PanoptoListIE,
@ -1457,7 +1432,6 @@
PelotonIE,
PelotonLiveIE
)
from .people import PeopleIE
from .performgroup import PerformGroupIE
from .periscope import (
PeriscopeIE,
@ -1489,13 +1463,10 @@
PlatziIE,
PlatziCourseIE,
)
from .playfm import PlayFMIE
from .playplustv import PlayPlusTVIE
from .plays import PlaysTVIE
from .playstuff import PlayStuffIE
from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE
from .playvid import PlayvidIE
from .playwire import PlaywireIE
from .plutotv import PlutoTVIE
from .pluralsight import (
@ -1527,9 +1498,7 @@
from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .pornbox import PornboxIE
from .porncom import PornComIE
from .pornflip import PornFlipIE
from .pornhd import PornHdIE
from .pornhub import (
PornHubIE,
PornHubUserIE,
@ -1540,7 +1509,6 @@
from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
from .pornez import PornezIE
from .puhutv import (
PuhuTVIE,
PuhuTVSerieIE,
@ -1578,9 +1546,12 @@
RadioCanadaIE,
RadioCanadaAudioVideoIE,
)
from .radiocomercial import (
RadioComercialIE,
RadioComercialPlaylistIE,
)
from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE
from .radiofrance import (
FranceCultureIE,
RadioFranceIE,
@ -1632,7 +1603,6 @@
RCTIPlusTVIE,
)
from .rds import RDSIE
from .recurbate import RecurbateIE
from .redbee import ParliamentLiveUKIE, RTBFIE
from .redbulltv import (
RedBullTVIE,
@ -1641,6 +1611,7 @@
RedBullIE,
)
from .reddit import RedditIE
from .redge import RedCDNLivxIE
from .redgifs import (
RedGifsIE,
RedGifsSearchIE,
@ -1656,7 +1627,10 @@
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
from .rice import RICEIE
from .rinsefm import (
RinseFMIE,
RinseFMArtistPlaylistIE,
)
from .rmcdecouverte import RMCDecouverteIE
from .rockstargames import RockstarGamesIE
from .rokfin import (
@ -1680,11 +1654,7 @@
RTLLuLiveIE,
RTLLuRadioIE,
)
from .rtl2 import (
RTL2IE,
RTL2YouIE,
RTL2YouSeriesIE,
)
from .rtl2 import RTL2IE
from .rtnews import (
RTNewsIE,
RTDocumentryIE,
@ -1706,16 +1676,15 @@
RTVEInfantilIE,
RTVETelevisionIE,
)
from .rtvnh import RTVNHIE
from .rtvs import RTVSIE
from .rtvslo import RTVSLOIE
from .ruhd import RUHDIE
from .rule34video import Rule34VideoIE
from .rumble import (
RumbleEmbedIE,
RumbleIE,
RumbleChannelIE,
)
from .rudovideo import RudoVideoIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
@ -1758,6 +1727,11 @@
from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE
from .sbscokr import (
SBSCoKrIE,
SBSCoKrAllvodProgramIE,
SBSCoKrProgramsVodIE,
)
from .screen9 import Screen9IE
from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE
@ -1772,6 +1746,7 @@
)
from .scrolller import ScrolllerIE
from .seeker import SeekerIE
from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import SenateISVPIE, SenateGovIE
from .sendtonews import SendtoNewsIE
@ -1786,10 +1761,6 @@
ShahidIE,
ShahidShowIE,
)
from .shared import (
SharedIE,
VivoIE,
)
from .sharevideos import ShareVideosEmbedIE
from .sibnet import SibnetEmbedIE
from .shemaroome import ShemarooMeIE
@ -1867,7 +1838,6 @@
SpankBangIE,
SpankBangPlaylistIE,
)
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE
from .spike import (
BellatorIE,
@ -1902,6 +1872,8 @@
from .stacommu import (
StacommuLiveIE,
StacommuVODIE,
TheaterComplexTownVODIE,
TheaterComplexTownPPVIE,
)
from .stanfordoc import StanfordOpenClassroomIE
from .startv import StarTVIE
@ -1915,7 +1887,6 @@
StoryFireSeriesIE,
)
from .streamable import StreamableIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streamff import StreamFFIE
from .streetvoice import StreetVoiceIE
@ -1935,7 +1906,6 @@
SVTSeriesIE,
)
from .swearnet import SwearnetEpisodeIE
from .swrmediathek import SWRMediathekIE
from .syvdk import SYVDKIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
@ -1962,7 +1932,6 @@
ConanClassicIE,
)
from .teamtreehouse import TeamTreeHouseIE
from .techtalks import TechTalksIE
from .ted import (
TedEmbedIE,
TedPlaylistIE,
@ -2004,6 +1973,10 @@
from .testurl import TestURLIE
from .tf1 import TF1IE
from .tfo import TFOIE
from .theguardian import (
TheGuardianPodcastIE,
TheGuardianPodcastPlaylistIE,
)
from .theholetv import TheHoleTvIE
from .theintercept import TheInterceptIE
from .theplatform import (
@ -2014,7 +1987,6 @@
from .thesun import TheSunIE
from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .thisvid import (
ThisVidIE,
@ -2036,7 +2008,6 @@
TikTokLiveIE,
DouyinIE,
)
from .tinypic import TinyPicIE
from .tmz import TMZIE
from .tnaflix import (
TNAFlixNetworkEmbedIE,
@ -2051,10 +2022,6 @@
from .toggo import (
ToggoIE,
)
from .tokentube import (
TokentubeIE,
TokentubeChannelIE
)
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
from .toutv import TouTvIE
@ -2065,7 +2032,6 @@
TrillerUserIE,
TrillerShortIE,
)
from .trilulilu import TriluliluIE
from .trovo import (
TrovoIE,
TrovoVodIE,
@ -2073,6 +2039,7 @@
TrovoChannelClipIE,
)
from .trtcocuk import TrtCocukVideoIE
from .trtworld import TrtWorldIE
from .trueid import TrueIDIE
from .trunews import TruNewsIE
from .truth import TruthIE
@ -2090,8 +2057,6 @@
TuneInPodcastEpisodeIE,
TuneInShortenerIE,
)
from .tunepk import TunePkIE
from .turbo import TurboIE
from .tv2 import (
TV2IE,
TV2ArticleIE,
@ -2132,16 +2097,7 @@
from .tviplayer import TVIPlayerIE
from .tvland import TVLandIE
from .tvn24 import TVN24IE
from .tvnet import TVNetIE
from .tvnoe import TVNoeIE
from .tvnow import (
TVNowIE,
TVNowFilmIE,
TVNowNewIE,
TVNowSeasonIE,
TVNowAnnualIE,
TVNowShowIE,
)
from .tvopengr import (
TVOpenGrWatchIE,
TVOpenGrEmbedIE,
@ -2159,7 +2115,6 @@
)
from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE
from .twitcasting import (
@ -2208,7 +2163,6 @@
from .umg import UMGDeIE
from .unistra import UnistraIE
from .unity import UnityIE
from .unscripted import UnscriptedNewsVideoIE
from .unsupported import KnownDRMIE, KnownPiracyIE
from .uol import UOLIE
from .uplynk import (
@ -2227,7 +2181,6 @@
from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veo import VeoIE
from .veoh import (
VeohIE,
@ -2249,7 +2202,6 @@
ViceArticleIE,
ViceShowIE,
)
from .vidbit import VidbitIE
from .viddler import ViddlerIE
from .videa import VideaIE
from .videocampus_sachsen import (
@ -2277,6 +2229,7 @@
VidioLiveIE
)
from .vidlii import VidLiiIE
from .vidly import VidlyIE
from .viewlift import (
ViewLiftIE,
ViewLiftEmbedIE,
@ -2299,7 +2252,6 @@
VimmIE,
VimmRecordingIE,
)
from .vimple import VimpleIE
from .vine import (
VineIE,
VineUserIE,
@ -2308,6 +2260,7 @@
VikiIE,
VikiChannelIE,
)
from .viously import ViouslyIE
from .viqeo import ViqeoIE
from .viu import (
ViuIE,
@ -2323,10 +2276,8 @@
VKPlayLiveIE,
)
from .vocaroo import VocarooIE
from .vodlocker import VodlockerIE
from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE
from .voicerepublic import VoiceRepublicIE
from .voicy import (
VoicyIE,
VoicyChannelIE,
@ -2346,23 +2297,13 @@
KetnetIE,
DagelijkseKostIE,
)
from .vrak import VrakIE
from .vrv import (
VRVIE,
VRVSeriesIE,
)
from .vshare import VShareIE
from .vtm import VTMIE
from .medialaan import MedialaanIE
from .vuclip import VuClipIE
from .vupload import VuploadIE
from .vvvvid import (
VVVVIDIE,
VVVVIDShowIE,
)
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
from .wakanim import WakanimIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,
@ -2374,8 +2315,6 @@
WASDTVClipIE,
)
from .wat import WatIE
from .watchbox import WatchBoxIE
from .watchindianporn import WatchIndianPornIE
from .wdr import (
WDRIE,
WDRPageIE,
@ -2409,7 +2348,6 @@
from .weyyak import WeyyakIE
from .whyp import WhypIE
from .wikimedia import WikimediaIE
from .willow import WillowIE
from .wimbledon import WimbledonIE
from .wimtv import WimTVIE
from .whowatch import WhoWatchIE
@ -2443,7 +2381,6 @@
WykopPostCommentIE,
)
from .xanimu import XanimuIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE
from .xhamster import (
@ -2459,8 +2396,6 @@
from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xstream import XstreamIE
from .xtube import XTubeUserIE, XTubeIE
from .xuite import XuiteIE
from .xvideos import (
XVideosIE,
XVideosQuickiesIE
@ -2490,10 +2425,7 @@
YappyIE,
YappyProfileIE,
)
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
from .yle_areena import YleAreenaIE
from .ynet import YnetIE
from .youjizz import YouJizzIE
from .youku import (
YoukuIE,
@ -2569,6 +2501,9 @@
ZingMp3ChartMusicVideoIE,
ZingMp3UserIE,
ZingMp3HubIE,
ZingMp3LiveRadioIE,
ZingMp3PodcastEpisodeIE,
ZingMp3PodcastIE,
)
from .zoom import ZoomIE
from .zype import ZypeIE

View file

@ -16,6 +16,7 @@
try_get,
unescapeHTML,
update_url_query,
url_or_none,
)
@ -379,6 +380,18 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'noplaylist': True,
'skip_download': 'm3u8',
},
}, {
# 'videoEpisodes' is a dict with `items` key
'url': 'https://iview.abc.net.au/show/7-30-mark-humphries-satire',
'info_dict': {
'id': '178458-0',
'title': 'Episodes',
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
'series': '7.30 Mark Humphries Satire',
'season': 'Episodes',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
},
'playlist_count': 15,
}]
def _real_extract(self, url):
@ -398,12 +411,14 @@ def _real_extract(self, url):
series = video_data['selectedSeries']
return {
'_type': 'playlist',
'entries': [self.url_result(episode['shareUrl'])
for episode in series['_embedded']['videoEpisodes']],
'entries': [self.url_result(episode_url, ABCIViewIE)
for episode_url in traverse_obj(series, (
'_embedded', 'videoEpisodes', (None, 'items'), ..., 'shareUrl', {url_or_none}))],
'id': series.get('id'),
'title': dict_get(series, ('title', 'displaySubtitle')),
'description': series.get('description'),
'series': dict_get(series, ('showTitle', 'displayTitle')),
'season': dict_get(series, ('title', 'displaySubtitle')),
'thumbnail': series.get('thumbnail'),
'thumbnail': traverse_obj(
series, 'thumbnail', ('images', lambda _, v: v['name'] == 'seriesThumbnail', 'url'), get_all=False),
}

View file

@ -92,6 +92,8 @@ def abematv_license_open(self, url):
class AbemaTVBaseIE(InfoExtractor):
_NETRC_MACHINE = 'abematv'
_USERTOKEN = None
_DEVICE_ID = None
_MEDIATOKEN = None
@ -136,11 +138,15 @@ def _get_device_token(self):
if self._USERTOKEN:
return self._USERTOKEN
add_opener(self._downloader, AbemaLicenseHandler(self))
username, _ = self._get_login_info()
AbemaTVBaseIE._USERTOKEN = username and self.cache.load(self._NETRC_MACHINE, username)
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
AbemaTVBaseIE._USERTOKEN = auth_cache and auth_cache.get('usertoken')
if AbemaTVBaseIE._USERTOKEN:
# try authentication with locally stored token
try:
AbemaTVBaseIE._DEVICE_ID = auth_cache.get('device_id')
self._get_media_token(True)
return
except ExtractorError as e:
@ -159,7 +165,6 @@ def _get_device_token(self):
})
AbemaTVBaseIE._USERTOKEN = user_data['token']
add_opener(self._downloader, AbemaLicenseHandler(self))
return self._USERTOKEN
def _get_media_token(self, invalidate=False, to_show=True):
@ -181,6 +186,37 @@ def _get_media_token(self, invalidate=False, to_show=True):
return self._MEDIATOKEN
def _perform_login(self, username, password):
self._get_device_token()
if self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') and self._get_media_token():
self.write_debug('Skipping logging in')
return
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
else:
ep, method = 'oneTimePassword', 'userId'
login_response = self._download_json(
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password
}).encode('utf-8'), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
'Content-Type': 'application/json',
})
AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True)
auth_cache = {
'device_id': AbemaTVBaseIE._DEVICE_ID,
'usertoken': AbemaTVBaseIE._USERTOKEN,
}
self.cache.store(self._NETRC_MACHINE, username, auth_cache)
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
return self._download_json(
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
@ -204,14 +240,14 @@ def _extract_breadcrumb_list(self, webpage, video_id):
class AbemaTVIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
_NETRC_MACHINE = 'abematv'
_TESTS = [{
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
'info_dict': {
'id': '194-25_s2_p1',
'title': '第1話 「チーズケーキ」 「モーニング再び」',
'series': '異世界食堂2',
'series_number': 2,
'season': 'シーズン2',
'season_number': 2,
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
'episode_number': 1,
},
@ -252,33 +288,6 @@ class AbemaTVIE(AbemaTVBaseIE):
}]
_TIMETABLE = None
def _perform_login(self, username, password):
self._get_device_token()
if self.cache.load(self._NETRC_MACHINE, username) and self._get_media_token():
self.write_debug('Skipping logging in')
return
if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email'
else:
ep, method = 'oneTimePassword', 'userId'
login_response = self._download_json(
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password
}).encode('utf-8'), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
'Content-Type': 'application/json',
})
AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True)
self.cache.store(self._NETRC_MACHINE, username, AbemaTVBaseIE._USERTOKEN)
def _real_extract(self, url):
# starting download using infojson from this extractor is undefined behavior,
# and never be fixed in the future; you must trigger downloads by directly specifying URL.
@ -347,12 +356,12 @@ def _real_extract(self, url):
)?
''', r'\1', og_desc)
# canonical URL may contain series and episode number
# canonical URL may contain season and episode number
mobj = re.search(r's(\d+)_p(\d+)$', canonical_url)
if mobj:
seri = int_or_none(mobj.group(1), default=float('inf'))
epis = int_or_none(mobj.group(2), default=float('inf'))
info['series_number'] = seri if seri < 100 else None
info['season_number'] = seri if seri < 100 else None
# some anime like Detective Conan (though not available in AbemaTV)
# has more than 1000 episodes (1026 as of 2021/11/15)
info['episode_number'] = epis if epis < 2000 else None
@ -381,7 +390,7 @@ def _real_extract(self, url):
self.report_warning('This is a premium-only stream')
info.update(traverse_obj(api_response, {
'series': ('series', 'title'),
'season': ('season', 'title'),
'season': ('season', 'name'),
'season_number': ('season', 'sequence'),
'episode_number': ('episode', 'number'),
}))

View file

@ -3,6 +3,7 @@
import json
import os
import random
import time
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
@ -17,17 +18,38 @@
int_or_none,
intlist_to_bytes,
long_to_bytes,
parse_iso8601,
pkcs1pad,
strip_or_none,
str_or_none,
try_get,
unified_strdate,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class ADNIE(InfoExtractor):
class ADNBaseIE(InfoExtractor):
IE_DESC = 'Animation Digital Network'
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_NETRC_MACHINE = 'animationdigitalnetwork'
_BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = f'https://gw.api.{_BASE}/'
_PLAYER_BASE_URL = f'{_API_BASE_URL}player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
}
_LINE_ALIGN_MAP = {
'middle': 8,
'end': 4,
}
class ADNIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261',
@ -44,29 +66,35 @@ class ADNIE(InfoExtractor):
'season_number': 1,
'episode': 'À ce soir !',
'episode_number': 1,
'thumbnail': str,
'season': 'Season 1',
},
'skip': 'Only available in region (FR, ...)',
'skip': 'Only available in French and German speaking Europe',
}, {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}, {
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
'info_dict': {
'id': '23550',
'ext': 'mp4',
'episode_number': 1,
'duration': 1417,
'release_date': '20231004',
'series': 'The Eminence in Shadow',
'season_number': 2,
'episode': str,
'title': str,
'thumbnail': str,
'season': 'Season 2',
'comment_count': int,
'average_rating': float,
'description': str,
},
# 'skip': 'Only available in French and German speaking Europe',
}]
_NETRC_MACHINE = 'animationdigitalnetwork'
_BASE = 'animationdigitalnetwork.fr'
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
_HEADERS = {}
_LOGIN_ERR_MESSAGE = 'Unable to log in'
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
}
_LINE_ALIGN_MAP = {
'middle': 8,
'end': 4,
}
def _get_subtitles(self, sub_url, video_id):
if not sub_url:
return None
@ -116,6 +144,8 @@ def _get_subtitles(self, sub_url, video_id):
if sub_lang == 'vostf':
sub_lang = 'fr'
elif sub_lang == 'vostde':
sub_lang = 'de'
subtitles.setdefault(sub_lang, []).extend([{
'ext': 'json',
'data': json.dumps(sub),
@ -147,7 +177,7 @@ def _perform_login(self, username, password):
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
def _real_extract(self, url):
video_id = self._match_id(url)
lang, video_id = self._match_valid_url(url).group('lang', 'id')
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
player = self._download_json(
video_base_url + 'configuration', video_id,
@ -157,12 +187,15 @@ def _real_extract(self, url):
user = options['user']
if not user.get('hasAccess'):
self.raise_login_required()
start_date = traverse_obj(options, ('video', 'startDate', {str}))
if (parse_iso8601(start_date) or 0) > time.time():
raise ExtractorError(f'This video is not available yet. Release date: {start_date}', expected=True)
self.raise_login_required('This video requires a subscription', method='password')
token = self._download_json(
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
video_id, 'Downloading access token', headers={
'x-player-refresh-token': user['refreshToken']
'X-Player-Refresh-Token': user['refreshToken'],
}, data=b'')['token']
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
@ -184,7 +217,9 @@ def _real_extract(self, url):
try:
links_data = self._download_json(
links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization
'X-Player-Token': authorization,
'X-Target-Distribution': lang,
**self._HEADERS
}, query={
'freeWithAds': 'true',
'adaptive': 'false',
@ -232,8 +267,14 @@ def _real_extract(self, url):
if format_id == 'vf':
for f in m3u8_formats:
f['language'] = 'fr'
elif format_id == 'vde':
for f in m3u8_formats:
f['language'] = 'de'
formats.extend(m3u8_formats)
if not formats:
self.raise_login_required('This video requires a subscription', method='password')
video = (self._download_json(
self._API_BASE_URL + 'video/%s' % video_id, video_id,
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
@ -255,3 +296,40 @@ def _real_extract(self, url):
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
'comment_count': int_or_none(video.get('commentsCount')),
}
class ADNSeasonIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
'playlist_count': 12,
'info_dict': {
'id': '911',
'title': 'Tokyo Mew Mew New',
},
# 'skip': 'Only available in French end German speaking Europe',
}]
def _real_extract(self, url):
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
show = self._download_json(
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
'Downloading show JSON metadata', headers=self._HEADERS)['show']
show_id = str(show['id'])
episodes = self._download_json(
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers={
'X-Target-Distribution': lang,
**self._HEADERS
}, query={
'order': 'asc',
'limit': '-1',
})
def entries():
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
yield self.url_result(
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
ADNIE, episode_id)
return self.playlist_result(entries(), show_id, show.get('title'))

View file

@ -93,7 +93,7 @@ def _extract_aetn_info(self, domain, filter_key, filter_value, url):
resource = self._get_mvpd_resource(
requestor_id, theplatform_metadata['title'],
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
theplatform_metadata['ratings'][0]['rating'])
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
info.update(self._extract_aen_smil(media_url, video_id, auth))
@ -121,11 +121,21 @@ class AENetworksIE(AENetworksBaseIE):
'info_dict': {
'id': '22253814',
'ext': 'mp4',
'title': 'Winter is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'title': 'Winter Is Coming',
'description': 'md5:a40e370925074260b1c8a633c632c63a',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
'duration': 2592.0,
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:5',
'tags': 'count:14',
'categories': ['Mountain Men'],
'episode_number': 1,
'episode': 'Episode 1',
'season': 'Season 1',
'season_number': 1,
'series': 'Mountain Men',
},
'params': {
# m3u8 download
@ -143,6 +153,15 @@ class AENetworksIE(AENetworksBaseIE):
'timestamp': 1452634428,
'upload_date': '20160112',
'uploader': 'AENE-NEW',
'duration': 1277.695,
'thumbnail': r're:^https?://.*\.jpe?g$',
'chapters': 'count:4',
'tags': 'count:23',
'episode': 'Episode 1',
'episode_number': 1,
'season': 'Season 9',
'season_number': 9,
'series': 'Duck Dynasty',
},
'params': {
# m3u8 download

View file

@ -1,63 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class AirMozillaIE(InfoExtractor):
_VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?'
_TEST = {
'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/',
'md5': '8d02f53ee39cf006009180e21df1f3ba',
'info_dict': {
'id': '6x4q2w',
'ext': 'mp4',
'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco',
'thumbnail': r're:https?://.*/poster\.jpg',
'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...',
'timestamp': 1422487800,
'upload_date': '20150128',
'location': 'SFO Commons',
'duration': 3780,
'view_count': int,
'categories': ['Main', 'Privacy'],
}
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id')
embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id)
jwconfig = self._parse_json(self._search_regex(
r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config']
info_dict = self._parse_jwplayer_data(jwconfig, video_id)
view_count = int_or_none(self._html_search_regex(
r'Views since archived: ([0-9]+)',
webpage, 'view count', fatal=False))
timestamp = parse_iso8601(self._html_search_regex(
r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False))
duration = parse_duration(self._search_regex(
r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)',
webpage, 'duration', fatal=False))
info_dict.update({
'id': video_id,
'title': self._og_search_title(webpage),
'url': self._og_search_url(webpage),
'display_id': display_id,
'description': self._og_search_description(webpage),
'timestamp': timestamp,
'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None),
'duration': duration,
'view_count': view_count,
'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage),
})
return info_dict

253
yt_dlp/extractor/allstar.py Normal file
View file

@ -0,0 +1,253 @@
import functools
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
int_or_none,
join_nonempty,
parse_qs,
urljoin,
)
from ..utils.traversal import traverse_obj
_FIELDS = '''
_id
clipImageSource
clipImageThumb
clipLink
clipTitle
createdDate
shareId
user { _id }
username
views'''
_EXTRA_FIELDS = '''
clipLength
clipSizeBytes'''
_QUERIES = {
'clip': '''query ($id: String!) {
video: getClip(clipIdentifier: $id) {
%s %s
}
}''' % (_FIELDS, _EXTRA_FIELDS),
'montage': '''query ($id: String!) {
video: getMontage(clipIdentifier: $id) {
%s
}
}''' % _FIELDS,
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS),
'Montages': '''query ($page: Int!, $user: String!) {
videos: montages(search: createdDate, page: $page, user: $user) {
data { %s }
}
}''' % _FIELDS,
'Mobile Clips': '''query ($page: Int!, $user: String!) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS),
}
class AllstarBaseIE(InfoExtractor):
@staticmethod
def _parse_video_data(video_data):
def media_url_or_none(path):
return urljoin('https://media.allstar.gg/', path)
info = traverse_obj(video_data, {
'id': ('_id', {str}),
'display_id': ('shareId', {str}),
'title': ('clipTitle', {str}),
'url': ('clipLink', {media_url_or_none}),
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
'duration': ('clipLength', {int_or_none}),
'filesize': ('clipSizeBytes', {int_or_none}),
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
'uploader': ('username', {str}),
'uploader_id': ('user', '_id', {str}),
'view_count': ('views', {int_or_none}),
})
if info.get('id') and info.get('url'):
basename = 'clip' if '/clips/' in info['url'] else 'montage'
info['webpage_url'] = f'https://allstar.gg/{basename}?{basename}={info["id"]}'
info.update({
'extractor_key': AllstarIE.ie_key(),
'extractor': AllstarIE.IE_NAME,
'uploader_url': urljoin('https://allstar.gg/u/', info.get('uploader_id')),
})
return info
def _call_api(self, query, variables, path, video_id=None, note=None):
response = self._download_json(
'https://a1.allstar.gg/graphql', video_id, note=note,
headers={'content-type': 'application/json'},
data=json.dumps({'variables': variables, 'query': query}).encode())
errors = traverse_obj(response, ('errors', ..., 'message', {str}))
if errors:
raise ExtractorError('; '.join(errors))
return traverse_obj(response, path)
class AllstarIE(AllstarBaseIE):
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?P<type>(?:clip|montage))\?(?P=type)=(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://allstar.gg/clip?clip=64482c2da9eec30008a67d1b',
'info_dict': {
'id': '64482c2da9eec30008a67d1b',
'title': '4K on Inferno',
'url': 'md5:66befb5381eef0c9456026386c25fa55',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'uploader': 'chrk.',
'ext': 'mp4',
'duration': 20,
'filesize': 21199257,
'timestamp': 1682451501,
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230425',
'view_count': int,
}
}, {
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
'info_dict': {
'id': '64a1ec6b887f4c0008dc50b8',
'display_id': '8LJLY4JKB',
'title': 'AK-47 3K on Mirage',
'url': 'md5:dde224fd12f035c0e2529a4ae34c4283',
'ext': 'mp4',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'duration': 16,
'filesize': 30175859,
'timestamp': 1688333419,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230702',
'view_count': int,
}
}, {
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
'info_dict': {
'id': '643e64089da7e9363e1fa66c',
'display_id': 'APQLGM2IMXW',
'title': 'cherokee Rapid Fire Snipers Montage',
'url': 'md5:a3ee356022115db2b27c81321d195945',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'ext': 'mp4',
'timestamp': 1681810448,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230418',
'view_count': int,
}
}, {
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
'info_dict': {
'id': '64a2697372ce3703de29e868',
'display_id': 'RILJMH6QOS',
'title': 'cherokee Rapid Fire Snipers Montage',
'url': 'md5:d5672e6f88579730c2310a80fdbc4030',
'thumbnail': r're:https://media\.allstar\.gg/.+\.(?:png|jpg)$',
'ext': 'mp4',
'timestamp': 1688365434,
'uploader': 'cherokee',
'uploader_id': '62b8bdfc9021052f7905882d',
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230703',
'view_count': int,
}
}]
def _real_extract(self, url):
query_id, video_id = self._match_valid_url(url).group('type', 'id')
return self._parse_video_data(
self._call_api(
_QUERIES.get(query_id), {'id': video_id}, ('data', 'video'), video_id))
class AllstarProfileIE(AllstarBaseIE):
_VALID_URL = r'https?://(?:www\.)?allstar\.gg/(?:profile\?user=|u/)(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://allstar.gg/profile?user=62b8bdfc9021052f7905882d',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-clips',
'title': 'cherokee - Clips',
},
'playlist_mincount': 15
}, {
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-clips-730',
'title': 'cherokee - Clips - 730',
},
'playlist_mincount': 15
}, {
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-montages',
'title': 'cherokee - Montages',
},
'playlist_mincount': 4
}, {
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-mobile',
'title': 'cherokee - Mobile Clips',
},
'playlist_mincount': 1
}]
_PAGE_SIZE = 10
def _get_page(self, user_id, display_id, game, query, page_num):
page_num += 1
for video_data in self._call_api(
query, {
'user': user_id,
'page': page_num,
'game': game,
}, ('data', 'videos', 'data'), display_id, f'Downloading page {page_num}'):
yield self._parse_video_data(video_data)
def _real_extract(self, url):
display_id = self._match_id(url)
profile_data = self._download_json(
urljoin('https://api.allstar.gg/v1/users/profile/', display_id), display_id)
user_id = traverse_obj(profile_data, ('data', ('_id'), {str}))
if not user_id:
raise ExtractorError('Unable to extract the user id')
username = traverse_obj(profile_data, ('data', 'profile', ('username'), {str}))
url_query = parse_qs(url)
game = traverse_obj(url_query, ('game', 0, {int_or_none}))
query_id = traverse_obj(url_query, ('view', 0), default='Clips')
if query_id not in ('Clips', 'Montages', 'Mobile Clips'):
raise ExtractorError(f'Unsupported playlist URL type {query_id!r}')
return self.playlist_result(
OnDemandPagedList(
functools.partial(
self._get_page, user_id, display_id, game, _QUERIES.get(query_id)), self._PAGE_SIZE),
playlist_id=join_nonempty(user_id, query_id.lower().split()[0], game),
playlist_title=join_nonempty((username or display_id), query_id, game, delim=' - '))

View file

@ -0,0 +1,96 @@
import re
from .archiveorg import ArchiveOrgIE
from .common import InfoExtractor
from ..utils import (
InAdvancePagedList,
int_or_none,
orderedSet,
str_to_int,
urljoin,
)
class AltCensoredIE(InfoExtractor):
IE_NAME = 'altcensored'
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/(?:watch\?v=|embed/)(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.altcensored.com/watch?v=k0srjLSkga8',
'info_dict': {
'id': 'youtube-k0srjLSkga8',
'ext': 'webm',
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403',
'creator': 'Virginie Vota',
'release_year': 2018,
'upload_date': '20230318',
'uploader': 'admin@altcensored.com',
'description': 'md5:0b38a8fc04103579d5c1db10a247dc30',
'timestamp': 1679161343,
'track': 'k0srjLSkga8',
'duration': 926.09,
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int,
'categories': ['News & Politics'],
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
return {
'_type': 'url_transparent',
'url': f'https://archive.org/details/youtube-{video_id}',
'ie_key': ArchiveOrgIE.ie_key(),
'view_count': str_to_int(self._html_search_regex(
r'YouTube Views:(?:\s|&nbsp;)*([\d,]+)', webpage, 'view count', default=None)),
'categories': self._html_search_regex(
r'<a href="/category/\d+">\s*\n?\s*([^<]+)</a>',
webpage, 'category', default='').split() or None,
}
class AltCensoredChannelIE(InfoExtractor):
IE_NAME = 'altcensored:channel'
_VALID_URL = r'https?://(?:www\.)?altcensored\.com/channel/(?!page|table)(?P<id>[^/?#]+)'
_PAGE_SIZE = 24
_TESTS = [{
'url': 'https://www.altcensored.com/channel/UCFPTO55xxHqFqkzRZHu4kcw',
'info_dict': {
'title': 'Virginie Vota',
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
},
'playlist_count': 91
}, {
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
'info_dict': {
'title': 'yukikaze775',
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
},
'playlist_count': 4
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
webpage = self._download_webpage(
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
page_count = int_or_none(self._html_search_regex(
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>',
webpage, 'page count', default='1'))
def page_func(page_num):
page_num += 1
webpage = self._download_webpage(
f'https://altcensored.com/channel/{channel_id}/page/{page_num}',
channel_id, note=f'Downloading page {page_num}')
items = re.findall(r'<a[^>]+href="(/watch\?v=[^"]+)', webpage)
return [self.url_result(urljoin('https://www.altcensored.com', path), AltCensoredIE)
for path in orderedSet(items)]
return self.playlist_result(
InAdvancePagedList(page_func, page_count, self._PAGE_SIZE),
playlist_id=channel_id, playlist_title=title)

View file

@ -0,0 +1,77 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class AmadeusTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?amadeus\.tv/library/(?P<id>[\da-f]+)'
_TESTS = [{
'url': 'http://www.amadeus.tv/library/65091a87ff85af59d9fc54c3',
'info_dict': {
'id': '5576678021301411311',
'ext': 'mp4',
'title': 'Jieon Park - 第五届珠海莫扎特国际青少年音乐周小提琴C组第三轮',
'thumbnail': 'http://1253584441.vod2.myqcloud.com/a0046a27vodtransbj1253584441/7db4af535576678021301411311/coverBySnapshot_10_0.jpg',
'duration': 1264.8,
'upload_date': '20230918',
'timestamp': 1695034800,
'display_id': '65091a87ff85af59d9fc54c3',
'view_count': int,
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nuxt_data = self._search_nuxt_data(webpage, display_id, traverse=('fetch', '0'))
video_id = traverse_obj(nuxt_data, ('item', 'video', {str}))
if not video_id:
raise ExtractorError('Unable to extract actual video ID')
video_data = self._download_json(
f'http://playvideo.qcloud.com/getplayinfo/v2/1253584441/{video_id}',
video_id, headers={'Referer': 'http://www.amadeus.tv/'})
formats = []
for video in traverse_obj(video_data, ('videoInfo', ('sourceVideo', ('transcodeList', ...)), {dict})):
if not url_or_none(video.get('url')):
continue
formats.append({
**traverse_obj(video, {
'url': 'url',
'format_id': ('definition', {lambda x: f'http-{x or "0"}'}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'filesize': (('totalSize', 'size'), {int_or_none}),
'vcodec': ('videoStreamList', 0, 'codec'),
'acodec': ('audioStreamList', 0, 'codec'),
'fps': ('videoStreamList', 0, 'fps', {float_or_none}),
}, get_all=False),
'http_headers': {'Referer': 'http://www.amadeus.tv/'},
})
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
**traverse_obj(video_data, {
'title': ('videoInfo', 'basicInfo', 'name', {str}),
'thumbnail': ('coverInfo', 'coverUrl', {url_or_none}),
'duration': ('videoInfo', 'sourceVideo', ('floatDuration', 'duration'), {float_or_none}),
}, get_all=False),
**traverse_obj(nuxt_data, ('item', {
'title': (('title', 'title_en', 'title_cn'), {str}),
'description': (('description', 'description_en', 'description_cn'), {str}),
'timestamp': ('date', {parse_iso8601}),
'view_count': ('view', {int_or_none}),
}), get_all=False),
}

View file

@ -10,6 +10,7 @@
class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
_WORKING = False
IE_NAME = 'aol.com'
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'

View file

@ -52,7 +52,6 @@ class ArchiveOrgIE(InfoExtractor):
'creator': 'SRI International',
'uploader': 'laura@archive.org',
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
'release_year': 1968,
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
'track': 'XD300-23 68HighlightsAResearchCntAugHumanIntellect',
@ -134,7 +133,6 @@ class ArchiveOrgIE(InfoExtractor):
'album': '1977-05-08 - Barton Hall - Cornell University',
'release_date': '19770508',
'display_id': 'gd1977-05-08d01t07.flac',
'release_year': 1977,
'track_number': 7,
},
}, {

View file

@ -1,24 +1,25 @@
import json
import re
from functools import partial
from .common import InfoExtractor
from .generic import GenericIE
from ..utils import (
OnDemandPagedList,
bug_reports_message,
determine_ext,
ExtractorError,
int_or_none,
join_nonempty,
jwt_decode_hs256,
make_archive_id,
parse_duration,
qualities,
parse_iso8601,
remove_start,
str_or_none,
try_get,
unified_strdate,
unified_timestamp,
update_url,
update_url_query,
url_or_none,
xpath_text,
)
from ..compat import compat_etree_fromstring
from ..utils.traversal import traverse_obj
class ARDMediathekBaseIE(InfoExtractor):
@ -61,45 +62,6 @@ def _parse_media_info(self, media_info, video_id, fsk):
'subtitles': subtitles,
}
def _ARD_extract_episode_info(self, title):
"""Try to extract season/episode data from the title."""
res = {}
if not title:
return res
for pattern in [
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
# E.g.: title="Folge 25/42: Symmetrie"
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
# E.g.: title="Folge 1063 - Vertrauen"
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
]:
m = re.match(pattern, title)
if m:
groupdict = m.groupdict()
res['season_number'] = int_or_none(groupdict.get('season_number'))
res['episode_number'] = int_or_none(groupdict.get('episode_number'))
res['episode'] = str_or_none(groupdict.get('episode'))
# Build the episode title by removing numeric episode information:
if groupdict.get('ep_info') and not res['episode']:
res['episode'] = str_or_none(
title.replace(groupdict.get('ep_info'), ''))
if res['episode']:
res['episode'] = res['episode'].strip()
break
# As a fallback use the whole title as the episode name:
if not res.get('episode'):
res['episode'] = title.strip()
return res
def _extract_formats(self, media_info, video_id):
type_ = media_info.get('_type')
media_array = media_info.get('_mediaArray', [])
@ -155,144 +117,12 @@ def _extract_formats(self, media_info, video_id):
return formats
class ARDMediathekIE(ARDMediathekBaseIE):
IE_NAME = 'ARD:mediathek'
_VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
_TESTS = [{
# available till 26.07.2022
'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
'info_dict': {
'id': '44726822',
'ext': 'mp4',
'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
'duration': 1740,
},
'params': {
# m3u8 download
'skip_download': True,
}
}, {
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
'only_matching': True,
}, {
# audio
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
'only_matching': True,
}, {
'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
'only_matching': True,
}, {
# audio
'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
'only_matching': True,
}, {
'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
def _real_extract(self, url):
# determine video id from url
m = self._match_valid_url(url)
document_id = None
numid = re.search(r'documentId=([0-9]+)', url)
if numid:
document_id = video_id = numid.group(1)
else:
video_id = m.group('video_id')
webpage = self._download_webpage(url, video_id)
ERRORS = (
('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
'Video %s is no longer available'),
)
for pattern, message in ERRORS:
if pattern in webpage:
raise ExtractorError(message % video_id, expected=True)
if re.search(r'[\?&]rss($|[=&])', url):
doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return GenericIE()._extract_rss(url, video_id, doc)
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>',
r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
'description', webpage, 'meta description', default=None)
if description is None:
description = self._html_search_regex(
r'<p\s+class="teasertext">(.+?)</p>',
webpage, 'teaser text', default=None)
# Thumbnail is sometimes not present.
# It is in the mobile version, but that seems to use a different URL
# structure altogether.
thumbnail = self._og_search_thumbnail(webpage, default=None)
media_streams = re.findall(r'''(?x)
mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
"([^"]+)"''', webpage)
if media_streams:
QUALITIES = qualities(['lo', 'hi', 'hq'])
formats = []
for furl in set(media_streams):
if furl.endswith('.f4m'):
fid = 'f4m'
else:
fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
fid = fid_m.group(1) if fid_m else None
formats.append({
'quality': QUALITIES(fid),
'format_id': fid,
'url': furl,
})
info = {
'formats': formats,
}
else: # request JSON file
if not document_id:
video_id = self._search_regex(
(r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'),
webpage, 'media id', default=None)
info = self._extract_media_info(
'http://www.ardmediathek.de/play/media/%s' % video_id,
webpage, video_id)
info.update({
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
})
info.update(self._ARD_extract_episode_info(info['title']))
return info
class ARDIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
_TESTS = [{
# available till 7.12.2023
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html',
'md5': 'a438f671e87a7eba04000336a119ccc4',
'md5': '94812e6438488fb923c361a44469614b',
'info_dict': {
'id': 'maischberger-video-424',
'display_id': 'maischberger-video-424',
@ -399,31 +229,36 @@ def _real_extract(self, url):
}
class ARDBetaMediathekIE(ARDMediathekBaseIE):
class ARDBetaMediathekIE(InfoExtractor):
IE_NAME = 'ARDMediathek'
_VALID_URL = r'''(?x)https://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:(?P<client>[^/]+)/)?
(?:player|live|video|(?P<playlist>sendung|sammlung))/
(?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
(?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
(?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
(?:[^/]+/)?
(?:player|live|video)/
(?:[^?#]+/)?
(?P<id>[a-zA-Z0-9]+)
/?(?:[?#]|$)'''
_GEO_COUNTRIES = ['DE']
_TOKEN_URL = 'https://sso.ardmediathek.de/sso/token'
_TESTS = [{
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy8xZGY0ZGJmZS00ZWQwLTRmMGItYjhhYy0wOGQ4ZmYxNjVhZDI',
'md5': '3fd5fead7a370a819341129c8d713136',
'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4',
'info_dict': {
'display_id': 'filme-im-mdr/wolfsland-die-traurigen-schwestern/mdr-fernsehen',
'id': '12172961',
'title': 'Wolfsland - Die traurigen Schwestern',
'description': r're:^Als der Polizeiobermeister Raaben',
'duration': 5241,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:efa186f7b0054957',
'timestamp': 1670710500,
'upload_date': '20221210',
'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0',
'id': '12939099',
'title': 'Liebe auf vier Pfoten',
'description': r're:^Claudia Schmitt, Anwältin in Salzburg',
'duration': 5222,
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b',
'timestamp': 1701343800,
'upload_date': '20231130',
'ext': 'mp4',
'age_limit': 12,
'episode': 'Wolfsland - Die traurigen Schwestern',
'series': 'Filme im MDR'
'episode': 'Liebe auf vier Pfoten',
'series': 'Filme im MDR',
'age_limit': 0,
'channel': 'MDR',
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'],
},
}, {
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
@ -450,11 +285,31 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'timestamp': 1636398000,
'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b',
'upload_date': '20211108',
'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste',
'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll',
'duration': 915,
'episode': 'tagesschau, 20:00 Uhr',
'series': 'tagesschau',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678',
'channel': 'ARD-Aktuell',
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'],
},
}, {
'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'md5': 'c428b9effff18ff624d4f903bda26315',
'info_dict': {
'id': '94834686',
'ext': 'mp4',
'duration': 2700,
'episode': '7 Tage ... unter harten Jungs',
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
'upload_date': '20231005',
'timestamp': 1696491171,
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'series': '7 Tage ...',
'channel': 'HR',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a',
'title': '7 Tage ... unter harten Jungs',
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
},
}, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
@ -471,203 +326,254 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
}, {
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
'only_matching': True,
}, {
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
'only_matching': True,
}]
def _extract_episode_info(self, title):
patterns = [
# Pattern for title like "Homo sapiens (S06/E07) - Originalversion"
# from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw
r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*',
# E.g.: title="Fritjof aus Norwegen (2) (AD)"
# from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/
r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*',
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*',
# E.g.: title="Folge 25/42: Symmetrie"
# from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/
# E.g.: title="Folge 1063 - Vertrauen"
# from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/
r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*',
# As a fallback use the full title
r'(?P<title>.*)',
]
return traverse_obj(patterns, (..., {partial(re.match, string=title)}, {
'season_number': ('season_number', {int_or_none}),
'episode_number': ('episode_number', {int_or_none}),
'episode': ((
('episode', {str_or_none}),
('ep_info', {lambda x: title.replace(x, '')}),
('title', {str}),
), {str.strip}),
}), get_all=False)
def _real_extract(self, url):
display_id = self._match_id(url)
query = {'embedded': 'false', 'mcV6': 'true'}
headers = {}
if self._get_cookies(self._TOKEN_URL).get('ams'):
token = self._download_json(
self._TOKEN_URL, display_id, 'Fetching token for age verification',
'Unable to fetch age verification token', fatal=False)
id_token = traverse_obj(token, ('idToken', {str}))
decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict}))
user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False)
if not user_id:
self.report_warning('Unable to extract token, continuing without authentication')
else:
headers['x-authorization'] = f'Bearer {id_token}'
query['userId'] = user_id
if decoded_token.get('age_rating') != 18:
self.report_warning('Account is not verified as 18+; video may be unavailable')
page_data = self._download_json(
f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}',
display_id, query=query, headers=headers)
# For user convenience we use the old contentId instead of the longer crid
# Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283
old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int}))
if old_id is not None:
video_id = str(old_id)
archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)]
else:
self.report_warning(f'Could not extract contentId{bug_reports_message()}')
video_id = display_id
archive_ids = None
player_data = traverse_obj(
page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False)
is_live = player_data.get('type') == 'player_live'
media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict}))
if player_data.get('blockedByFsk'):
self.raise_login_required('This video is only available for age verified users or after 22:00')
formats = []
subtitles = {}
for stream in traverse_obj(media_data, ('streams', ..., {dict})):
kind = stream.get('kind')
# Prioritize main stream over sign language and others
preference = 1 if kind == 'main' else None
for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))):
media_url = media['url']
audio_kind = traverse_obj(media, (
'audios', 0, 'kind', {str}), default='').replace('standard', '')
lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu'
lang = join_nonempty(lang_code, audio_kind)
language_preference = 10 if lang == 'deu' else -10
if determine_ext(media_url) == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live)
for f in fmts:
f['language'] = lang
f['language_preference'] = language_preference
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'url': media_url,
'format_id': f'http-{kind}',
'preference': preference,
'language': lang,
'language_preference': language_preference,
**traverse_obj(media, {
'format_note': ('forcedLabel', {str}),
'width': ('maxHResolutionPx', {int_or_none}),
'height': ('maxVResolutionPx', {int_or_none}),
'vcodec': ('videoCodec', {str}),
}),
})
for sub in traverse_obj(media_data, ('subtitles', ..., {dict})):
for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))):
subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({
'url': sources['url'],
'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')),
})
age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none}))
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
'age_limit': age_limit,
**traverse_obj(media_data, ('meta', {
'title': 'title',
'description': 'synopsis',
'timestamp': ('broadcastedOnDateTime', {parse_iso8601}),
'series': 'seriesTitle',
'thumbnail': ('images', 0, 'url', {url_or_none}),
'duration': ('durationSeconds', {int_or_none}),
'channel': 'clipSourceName',
})),
**self._extract_episode_info(page_data.get('title')),
'_old_archive_ids': archive_ids,
}
class ARDMediathekCollectionIE(InfoExtractor):
_VALID_URL = r'''(?x)https://
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/?#]+/)?
(?P<playlist>sendung|serie|sammlung)/
(?:(?P<display_id>[^?#]+?)/)?
(?P<id>[a-zA-Z0-9]+)
(?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)'''
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV',
'info_dict': {
'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV',
'display_id': 'quiz/staffel-1-originalversion',
'title': 'Staffel 1 Originalversion',
},
'playlist_count': 3,
}, {
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD',
'info_dict': {
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD',
'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription',
'title': 'Staffel 4 mit Audiodeskription',
},
'playlist_count': 12,
}, {
'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/',
'info_dict': {
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1',
'display_id': 'babylon-berlin/staffel-1',
'title': 'Staffel 1',
},
'playlist_count': 8,
}, {
'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
'info_dict': {
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA',
'display_id': 'tatort',
'title': 'Tatort',
},
'playlist_mincount': 500,
}, {
'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2',
'info_dict': {
'id': '5eOHzt8XB2sqeFXbIoJlg2',
'display_id': 'die-kirche-bleibt-im-dorf',
'title': 'Die Kirche bleibt im Dorf',
'description': 'Die Kirche bleibt im Dorf',
},
'playlist_count': 4,
}, {
# playlist of type 'sendung'
'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/',
'only_matching': True,
}, {
# playlist of type 'serie'
'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1',
'only_matching': True,
}, {
# playlist of type 'sammlung'
'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/',
'only_matching': True,
}, {
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
'only_matching': True,
}, {
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
'only_matching': True,
}]
def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber):
""" Query the ARD server for playlist information
and returns the data in "raw" format """
if mode == 'sendung':
graphQL = json.dumps({
'query': '''{
showPage(
client: "%s"
showId: "%s"
pageNumber: %d
) {
pagination {
pageSize
totalElements
}
teasers { # Array
mediumTitle
links { target { id href title } }
type
}
}}''' % (client, playlist_id, pageNumber),
}).encode()
else: # mode == 'sammlung'
graphQL = json.dumps({
'query': '''{
morePage(
client: "%s"
compilationId: "%s"
pageNumber: %d
) {
widget {
pagination {
pageSize
totalElements
}
teasers { # Array
mediumTitle
links { target { id href title } }
type
}
}
}}''' % (client, playlist_id, pageNumber),
}).encode()
# Ressources for ARD graphQL debugging:
# https://api-test.ardmediathek.de/public-gateway
show_page = self._download_json(
'https://api.ardmediathek.de/public-gateway',
'[Playlist] %s' % display_id,
data=graphQL,
headers={'Content-Type': 'application/json'})['data']
# align the structure of the returned data:
if mode == 'sendung':
show_page = show_page['showPage']
else: # mode == 'sammlung'
show_page = show_page['morePage']['widget']
return show_page
def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode):
""" Collects all playlist entries and returns them as info dict.
Supports playlists of mode 'sendung' and 'sammlung', and also nested
playlists. """
entries = []
pageNumber = 0
while True: # iterate by pageNumber
show_page = self._ARD_load_playlist_snipped(
playlist_id, display_id, client, mode, pageNumber)
for teaser in show_page['teasers']: # process playlist items
if '/compilation/' in teaser['links']['target']['href']:
# alternativ cond.: teaser['type'] == "compilation"
# => This is an nested compilation, e.g. like:
# https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/
link_mode = 'sammlung'
else:
link_mode = 'video'
item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % (
client, link_mode, display_id,
# perform HTLM quoting of episode title similar to ARD:
re.sub('^-|-$', '', # remove '-' from begin/end
re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by -
teaser['links']['target']['title'].lower()
.replace('ä', 'ae').replace('ö', 'oe')
.replace('ü', 'ue').replace('ß', 'ss'))),
teaser['links']['target']['id'])
entries.append(self.url_result(
item_url,
ie=ARDBetaMediathekIE.ie_key()))
if (show_page['pagination']['pageSize'] * (pageNumber + 1)
>= show_page['pagination']['totalElements']):
# we've processed enough pages to get all playlist entries
break
pageNumber = pageNumber + 1
return self.playlist_result(entries, playlist_id, playlist_title=display_id)
_PAGE_SIZE = 100
def _real_extract(self, url):
video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
'id', 'display_id', 'playlist', 'client', 'season')
display_id, client = display_id or video_id, client or 'ard'
playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group(
'id', 'display_id', 'playlist', 'season', 'version')
if playlist_type:
# TODO: Extract only specified season
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
def call_api(page_num):
api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset'
return self._download_json(
f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id,
f'Downloading playlist page {page_num}', query={
'pageNumber': page_num,
'pageSize': self._PAGE_SIZE,
**({
'seasoned': 'true',
'seasonNumber': season_number,
'withOriginalversion': 'true' if version == 'OV' else 'false',
'withAudiodescription': 'true' if version == 'AD' else 'false',
} if season_number else {}),
})
player_page = self._download_json(
'https://api.ardmediathek.de/public-gateway',
display_id, data=json.dumps({
'query': '''{
playerPage(client:"%s", clipId: "%s") {
blockedByFsk
broadcastedOn
maturityContentRating
mediaCollection {
_duration
_geoblocked
_isLive
_mediaArray {
_mediaStreamArray {
_quality
_server
_stream
}
}
_previewImage
_subtitleUrl
_type
}
show {
title
}
image {
src
}
synopsis
title
tracking {
atiCustomVars {
contentId
}
}
}
}''' % (client, video_id),
}).encode(), headers={
'Content-Type': 'application/json'
})['data']['playerPage']
title = player_page['title']
content_id = str_or_none(try_get(
player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
media_collection = player_page.get('mediaCollection') or {}
if not media_collection and content_id:
media_collection = self._download_json(
'https://www.ardmediathek.de/play/media/' + content_id,
content_id, fatal=False) or {}
info = self._parse_media_info(
media_collection, content_id or video_id,
player_page.get('blockedByFsk'))
age_limit = None
description = player_page.get('synopsis')
maturity_content_rating = player_page.get('maturityContentRating')
if maturity_content_rating:
age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
if not age_limit and description:
age_limit = int_or_none(self._search_regex(
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
info.update({
'age_limit': age_limit,
'display_id': display_id,
'title': title,
'description': description,
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
'series': try_get(player_page, lambda x: x['show']['title']),
'thumbnail': (media_collection.get('_previewImage')
or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None))
or self.get_thumbnail_from_html(display_id, url)),
})
info.update(self._ARD_extract_episode_info(info['title']))
return info
def fetch_page(page_num):
for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})):
item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False)
if not item_id or item_id == playlist_id:
continue
item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video'
yield self.url_result(
f'https://www.ardmediathek.de/{item_mode}/{item_id}',
ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE),
**traverse_obj(item, {
'id': ('id', {str}),
'title': ('longTitle', {str}),
'duration': ('duration', {int_or_none}),
'timestamp': ('broadcastedOn', {parse_iso8601}),
}))
def get_thumbnail_from_html(self, display_id, url):
webpage = self._download_webpage(url, display_id, fatal=False) or ''
return (
self._og_search_thumbnail(webpage, default=None)
or self._html_search_meta('thumbnailUrl', webpage, default=None))
page_data = call_api(0)
full_id = join_nonempty(playlist_id, season_number, version, delim='_')
return self.playlist_result(
OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id,
title=page_data.get('title'), description=page_data.get('synopsis'))

303
yt_dlp/extractor/art19.py Normal file
View file

@ -0,0 +1,303 @@
import re
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class Art19IE(InfoExtractor):
_UUID_REGEX = r'[\da-f]{8}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{4}-?[\da-f]{12}'
_VALID_URL = [
rf'https?://(?:www\.)?art19\.com/shows/[^/#?]+/episodes/(?P<id>{_UUID_REGEX})',
rf'https?://rss\.art19\.com/episodes/(?P<id>{_UUID_REGEX})\.mp3',
]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL[0]})']
_TESTS = [{
'url': 'https://rss.art19.com/episodes/5ba1413c-48b8-472b-9cc3-cfd952340bdb.mp3',
'info_dict': {
'id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
'ext': 'mp3',
'title': 'Why Did DeSantis Drop Out?',
'series': 'The Daily Briefing',
'release_timestamp': 1705941275,
'description': 'md5:da38961da4a3f7e419471365e3c6b49f',
'episode': 'Episode 582',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'series_id': 'ed52a0ab-08b1-4def-8afc-549e4d93296d',
'upload_date': '20240122',
'timestamp': 1705940815,
'episode_number': 582,
'modified_date': '20240122',
'episode_id': '5ba1413c-48b8-472b-9cc3-cfd952340bdb',
'modified_timestamp': 1705941275,
'release_date': '20240122',
'duration': 527.4,
},
}, {
'url': 'https://art19.com/shows/scamfluencers/episodes/8319b776-4153-4d22-8630-631f204a03dd',
'info_dict': {
'id': '8319b776-4153-4d22-8630-631f204a03dd',
'ext': 'mp3',
'title': 'Martha Stewart: The Homemaker Hustler Part 2',
'modified_date': '20240116',
'upload_date': '20240105',
'modified_timestamp': 1705435802,
'episode_id': '8319b776-4153-4d22-8630-631f204a03dd',
'series_id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'description': 'md5:4aa7cfd1358dc57e729835bc208d7893',
'release_timestamp': 1705305660,
'release_date': '20240115',
'timestamp': 1704481536,
'episode_number': 88,
'series': 'Scamfluencers',
'duration': 2588.37501,
'episode': 'Episode 88',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.nu.nl/formule-1/6291456/verstappen-wordt-een-synoniem-voor-formule-1.html',
'info_dict': {
'id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
'ext': 'mp3',
'title': "'Verstappen wordt een synoniem voor Formule 1'",
'season': 'Seizoen 6',
'description': 'md5:39a7159a31c4cda312b2e893bdd5c071',
'episode_id': '7d42626a-7301-47db-bb8a-3b6f054d77d7',
'duration': 3061.82111,
'series_id': '93f4e113-2a60-4609-a564-755058fa40d8',
'release_date': '20231126',
'modified_timestamp': 1701156004,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'season_number': 6,
'episode_number': 52,
'modified_date': '20231128',
'upload_date': '20231126',
'timestamp': 1701025981,
'season_id': '36097c1e-7455-490d-a2fe-e2f10b4d5f26',
'series': 'De Boordradio',
'release_timestamp': 1701026308,
'episode': 'Episode 52',
},
}, {
'url': 'https://www.wishtv.com/podcast-episode/larry-bucshon-announces-retirement-from-congress/',
'info_dict': {
'id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
'ext': 'mp3',
'title': 'Larry Bucshon announces retirement from congress',
'upload_date': '20240115',
'episode_number': 148,
'episode': 'Episode 148',
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20240115',
'timestamp': 1705328205,
'release_timestamp': 1705329275,
'series': 'All INdiana Politics',
'modified_date': '20240117',
'modified_timestamp': 1705458901,
'series_id': 'c4af6c27-b10f-4ff2-9f84-0f407df86ff1',
'episode_id': '8da368bd-08d1-46d0-afaa-c134a4af7dc0',
'description': 'md5:53b5239e4d14973a87125c217c255b2a',
'duration': 1256.18848,
},
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for episode_id in re.findall(
rf'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-episode-id=[\'"]({cls._UUID_REGEX})[\'"]', webpage):
yield f'https://rss.art19.com/episodes/{episode_id}.mp3'
def _real_extract(self, url):
episode_id = self._match_id(url)
player_metadata = self._download_json(
f'https://art19.com/episodes/{episode_id}', episode_id,
note='Downloading player metadata', fatal=False,
headers={'Accept': 'application/vnd.art19.v0+json'})
rss_metadata = self._download_json(
f'https://rss.art19.com/episodes/{episode_id}.json', episode_id, fatal=False,
note='Downloading RSS metadata')
formats = [{
'format_id': 'direct',
'url': f'https://rss.art19.com/episodes/{episode_id}.mp3',
'vcodec': 'none',
'acodec': 'mp3',
}]
for fmt_id, fmt_data in traverse_obj(rss_metadata, ('content', 'media', {dict.items}, ...)):
if fmt_id == 'waveform_bin':
continue
fmt_url = traverse_obj(fmt_data, ('url', {url_or_none}))
if not fmt_url:
continue
formats.append({
'format_id': fmt_id,
'url': fmt_url,
'vcodec': 'none',
'acodec': fmt_id,
'quality': -2 if fmt_id == 'ogg' else -1,
})
return {
'id': episode_id,
'formats': formats,
**traverse_obj(player_metadata, ('episode', {
'title': ('title', {str}),
'description': ('description_plain', {str}),
'episode_id': ('id', {str}),
'episode_number': ('episode_number', {int_or_none}),
'season_id': ('season_id', {str}),
'series_id': ('series_id', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'release_timestamp': ('released_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601})
})),
**traverse_obj(rss_metadata, ('content', {
'title': ('episode_title', {str}),
'description': ('episode_description_plain', {str}),
'episode_id': ('episode_id', {str}),
'episode_number': ('episode_number', {int_or_none}),
'season': ('season_title', {str}),
'season_id': ('season_id', {str}),
'season_number': ('season_number', {int_or_none}),
'series': ('series_title', {str}),
'series_id': ('series_id', {str}),
'thumbnail': ('cover_image', {url_or_none}),
'duration': ('duration', {float_or_none}),
})),
}
class Art19ShowIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?art19\.com/shows/(?P<id>[\w-]+)(?:/embed)?/?'
_VALID_URL = [
rf'{_VALID_URL_BASE}(?:$|[#?])',
r'https?://rss\.art19\.com/(?P<id>[\w-]+)/?(?:$|[#?])',
]
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL_BASE}[^\'"])']
_TESTS = [{
'url': 'https://www.art19.com/shows/5898c087-a14f-48dc-b6fc-a2280a1ff6e0/',
'info_dict': {
'_type': 'playlist',
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
'display_id': 'echt-gebeurd',
'title': 'Echt Gebeurd',
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
'timestamp': 1492642167,
'upload_date': '20170419',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:7',
},
'playlist_mincount': 425,
}, {
'url': 'https://www.art19.com/shows/echt-gebeurd',
'info_dict': {
'_type': 'playlist',
'id': '5898c087-a14f-48dc-b6fc-a2280a1ff6e0',
'display_id': 'echt-gebeurd',
'title': 'Echt Gebeurd',
'description': 'md5:5fd11dc80b76e51ffd34b6067fd5e560',
'timestamp': 1492642167,
'upload_date': '20170419',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:7',
},
'playlist_mincount': 425,
}, {
'url': 'https://rss.art19.com/scamfluencers',
'info_dict': {
'_type': 'playlist',
'id': 'd3c9b8ca-26b3-42f4-9bd8-21d1a9031e75',
'display_id': 'scamfluencers',
'title': 'Scamfluencers',
'description': 'md5:7d239d670c0ced6dadbf71c4caf764b7',
'timestamp': 1647368573,
'upload_date': '20220315',
'modified_timestamp': int,
'modified_date': str,
'tags': [],
},
'playlist_mincount': 90,
}, {
'url': 'https://art19.com/shows/enthuellt/embed',
'info_dict': {
'_type': 'playlist',
'id': 'e2cacf57-bb8a-4263-aa81-719bcdd4f80c',
'display_id': 'enthuellt',
'title': 'Enthüllt',
'description': 'md5:17752246643414a2fd51744fc9a1c08e',
'timestamp': 1601645860,
'upload_date': '20201002',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:10',
},
'playlist_mincount': 10,
}]
_WEBPAGE_TESTS = [{
'url': 'https://deconstructingyourself.com/deconstructing-yourself-podcast',
'info_dict': {
'_type': 'playlist',
'id': 'cfbb9b01-c295-4adb-8726-adde7c03cf21',
'display_id': 'deconstructing-yourself',
'title': 'Deconstructing Yourself',
'description': 'md5:dab5082b28b248a35476abf64768854d',
'timestamp': 1570581181,
'upload_date': '20191009',
'modified_timestamp': int,
'modified_date': str,
'tags': 'count:5',
},
'playlist_mincount': 80,
}, {
'url': 'https://chicagoreader.com/columns-opinion/podcasts/ben-joravsky-show-podcast-episodes/',
'info_dict': {
'_type': 'playlist',
'id': '9dfa2c37-ab87-4c13-8388-4897914313ec',
'display_id': 'the-ben-joravsky-show',
'title': 'The Ben Joravsky Show',
'description': 'md5:c0f3ec0ee0dbea764390e521adc8780a',
'timestamp': 1550875095,
'upload_date': '20190222',
'modified_timestamp': int,
'modified_date': str,
'tags': ['Chicago Politics', 'chicago', 'Ben Joravsky'],
},
'playlist_mincount': 1900,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage)
for series_id in re.findall(
r'<div[^>]+\bclass=[\'"][^\'"]*art19-web-player[^\'"]*[\'"][^>]+\bdata-series-id=[\'"]([\w-]+)[\'"]', webpage):
yield f'https://art19.com/shows/{series_id}'
def _real_extract(self, url):
series_id = self._match_id(url)
series_metadata = self._download_json(
f'https://art19.com/series/{series_id}', series_id, note='Downloading series metadata',
headers={'Accept': 'application/vnd.art19.v0+json'})
return {
'_type': 'playlist',
'entries': [
self.url_result(f'https://rss.art19.com/episodes/{episode_id}.mp3', Art19IE)
for episode_id in traverse_obj(series_metadata, ('series', 'episode_ids', ..., {str}))
],
**traverse_obj(series_metadata, ('series', {
'id': ('id', {str}),
'display_id': ('slug', {str}),
'title': ('title', {str}),
'description': ('description_plain', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
})),
'tags': traverse_obj(series_metadata, ('tags', ..., 'name', {str})),
}

View file

@ -70,7 +70,24 @@ class ArteTVIE(ArteTVBaseIE):
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
'upload_date': '20230930',
'ext': 'mp4',
}
},
}, {
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
'info_dict': {
'id': '085374-003-A',
'ext': 'mp4',
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
'timestamp': 1702872000,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
'duration': 2594,
'title': 'Die kurze Zeit der Jugend',
'alt_title': 'Im hohen Norden geboren',
'upload_date': '20231218',
'subtitles': {
'fr': 'mincount:1',
'fr-acc': 'mincount:1',
},
},
}]
_GEO_BYPASS = True
@ -121,6 +138,16 @@ class ArteTVIE(ArteTVBaseIE):
),
}
@staticmethod
def _fix_accessible_subs_locale(subs):
updated_subs = {}
for lang, sub_formats in subs.items():
for format in sub_formats:
if format.get('url', '').endswith('-MAL.m3u8'):
lang += '-acc'
updated_subs.setdefault(lang, []).append(format)
return updated_subs
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
@ -174,6 +201,7 @@ def _real_extract(self, url):
secondary_formats.extend(fmts)
else:
formats.extend(fmts)
subs = self._fix_accessible_subs_locale(subs)
self._merge_subtitles(subs, target=subtitles)
elif stream['protocol'] in ('HTTPS', 'RTMP'):

View file

@ -0,0 +1,168 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
merge_dicts,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class AsobiChannelBaseIE(InfoExtractor):
_MICROCMS_HEADER = {'X-MICROCMS-API-KEY': 'qRaKehul9AHU8KtL0dnq1OCLKnFec6yrbcz3'}
def _extract_info(self, metadata):
return traverse_obj(metadata, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('body', {clean_html}),
'thumbnail': ('contents', 'video_thumb', 'url', {url_or_none}),
'timestamp': ('publishedAt', {parse_iso8601}),
'modified_timestamp': ('updatedAt', {parse_iso8601}),
'channel': ('channel', 'name', {str}),
'channel_id': ('channel', 'id', {str}),
})
class AsobiChannelIE(AsobiChannelBaseIE):
IE_NAME = 'asobichannel'
IE_DESC = 'ASOBI CHANNEL'
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/watch/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://asobichannel.asobistore.jp/watch/1ypp48qd32p',
'md5': '39df74e872afe032c4eb27b89144fc92',
'info_dict': {
'id': '1ypp48qd32p',
'ext': 'mp4',
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
'description': 'md5:b930bd2199c9b2fd75951ce4aaa7efd2',
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/a8e6f84119f54eb9ab4ce16729239905/%E3%82%B5%E3%83%A0%E3%83%8D%20(1).png',
'timestamp': 1697098247,
'upload_date': '20231012',
'modified_timestamp': 1698381162,
'modified_date': '20231027',
'channel': 'アイドルマスター',
'channel_id': 'idolmaster',
},
}, {
'url': 'https://asobichannel.asobistore.jp/watch/redigiwnjzqj',
'md5': '229fa8fb5c591c75ce8c37a497f113f6',
'info_dict': {
'id': 'redigiwnjzqj',
'ext': 'mp4',
'title': '【おまけ放送】アイドルマスター ミリオンライブ! 765プロch 原っぱ通信 #1',
'description': 'md5:7d9cd35fb54425a6967822bd564ea2d9',
'thumbnail': 'https://images.microcms-assets.io/assets/d2420de4b9194e11beb164f99edb1f95/20e5c1d6184242eebc2512a5dec59bf0/P1_%E5%8E%9F%E3%81%A3%E3%81%B1%E3%82%B5%E3%83%A0%E3%83%8D.png',
'modified_timestamp': 1697797125,
'modified_date': '20231020',
'timestamp': 1697261769,
'upload_date': '20231014',
'channel': 'アイドルマスター',
'channel_id': 'idolmaster',
},
}]
_survapi_header = None
def _real_initialize(self):
token = self._download_json(
'https://asobichannel-api.asobistore.jp/api/v1/vspf/token', None,
note='Retrieving API token')
self._survapi_header = {'Authorization': f'Bearer {token}'}
def _process_vod(self, video_id, metadata):
content_id = metadata['contents']['video_id']
vod_data = self._download_json(
f'https://survapi.channel.or.jp/proxy/v1/contents/{content_id}/get_by_cuid', video_id,
headers=self._survapi_header, note='Downloading vod data')
return {
'formats': self._extract_m3u8_formats(vod_data['ex_content']['streaming_url'], video_id),
}
def _process_live(self, video_id, metadata):
content_id = metadata['contents']['video_id']
event_data = self._download_json(
f'https://survapi.channel.or.jp/ex/events/{content_id}?embed=channel', video_id,
headers=self._survapi_header, note='Downloading event data')
player_type = traverse_obj(event_data, ('data', 'Player_type', {str}))
if player_type == 'poster':
self.raise_no_formats('Live event has not yet started', expected=True)
live_status = 'is_upcoming'
formats = []
elif player_type == 'player':
live_status = 'is_live'
formats = self._extract_m3u8_formats(
event_data['data']['Channel']['Custom_live_url'], video_id, live=True)
else:
raise ExtractorError('Unsupported player type {player_type!r}')
return {
'release_timestamp': traverse_obj(metadata, ('period', 'start', {parse_iso8601})),
'live_status': live_status,
'formats': formats,
}
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
f'https://channel.microcms.io/api/v1/media/{video_id}', video_id,
headers=self._MICROCMS_HEADER)
info = self._extract_info(metadata)
video_type = traverse_obj(metadata, ('contents', 'video_type', 0, {str}))
if video_type == 'VOD':
return merge_dicts(info, self._process_vod(video_id, metadata))
if video_type == 'LIVE':
return merge_dicts(info, self._process_live(video_id, metadata))
raise ExtractorError(f'Unexpected video type {video_type!r}')
class AsobiChannelTagURLIE(AsobiChannelBaseIE):
IE_NAME = 'asobichannel:tag'
IE_DESC = 'ASOBI CHANNEL'
_VALID_URL = r'https?://asobichannel\.asobistore\.jp/tag/(?P<id>[a-z0-9-_]+)'
_TESTS = [{
'url': 'https://asobichannel.asobistore.jp/tag/bjhh-nbcja',
'info_dict': {
'id': 'bjhh-nbcja',
'title': 'アイドルマスター ミリオンライブ! 765プロch 原っぱ通信',
},
'playlist_mincount': 16,
}, {
'url': 'https://asobichannel.asobistore.jp/tag/hvm5qw3c6od',
'info_dict': {
'id': 'hvm5qw3c6od',
'title': 'アイマスMOIW2023ラジオ',
},
'playlist_mincount': 13,
}]
def _real_extract(self, url):
tag_id = self._match_id(url)
webpage = self._download_webpage(url, tag_id)
title = traverse_obj(self._search_nextjs_data(
webpage, tag_id, fatal=False), ('props', 'pageProps', 'data', 'name', {str}))
media = self._download_json(
f'https://channel.microcms.io/api/v1/media?limit=999&filters=(tag[contains]{tag_id})',
tag_id, headers=self._MICROCMS_HEADER)
def entries():
for metadata in traverse_obj(media, ('contents', lambda _, v: v['id'])):
yield {
'_type': 'url',
'url': f'https://asobichannel.asobistore.jp/watch/{metadata["id"]}',
'ie_key': AsobiChannelIE.ie_key(),
**self._extract_info(metadata),
}
return self.playlist_result(entries(), tag_id, title)

View file

@ -1,53 +0,0 @@
from .common import InfoExtractor
from ..utils import unified_strdate
class ATTTechChannelIE(InfoExtractor):
_VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)'
_TEST = {
'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
'info_dict': {
'id': '11316',
'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use',
'ext': 'flv',
'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use',
'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140127',
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_url = self._search_regex(
r"url\s*:\s*'(rtmp://[^']+)'",
webpage, 'video URL')
video_id = self._search_regex(
r'mediaid\s*=\s*(\d+)',
webpage, 'video id', fatal=False)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})',
webpage, 'upload date', fatal=False), False)
return {
'id': video_id,
'display_id': display_id,
'url': video_url,
'ext': 'flv',
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
}

View file

@ -152,7 +152,7 @@ def page_func(page_num):
'sort': 'new',
'limit': self._PAGE_SIZE,
'offset': page_num * self._PAGE_SIZE,
}, note=f'Downloading page {page_num+1}')
}, note=f'Downloading page {page_num + 1}')
return [
self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE)
for video in data['items']

View file

@ -317,16 +317,25 @@ def _raise_extractor_error(self, media_selection_error):
def _download_media_selector(self, programme_id):
last_exception = None
formats, subtitles = [], {}
for media_set in self._MEDIA_SETS:
try:
return self._download_media_selector_url(
fmts, subs = self._download_media_selector_url(
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
formats.extend(fmts)
if subs:
self._merge_subtitles(subs, target=subtitles)
except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e
continue
self._raise_extractor_error(e)
self._raise_extractor_error(last_exception)
if last_exception:
if formats or subtitles:
self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
else:
self._raise_extractor_error(last_exception)
return formats, subtitles
def _download_media_selector_url(self, url, programme_id=None):
media_selection = self._download_json(
@ -1188,7 +1197,7 @@ def _real_extract(self, url):
if initial_data is None:
initial_data = self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
'preload state', default={})
'preload state', default='{}')
else:
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)

View file

@ -3,14 +3,13 @@
class BeatBumpVideoIE(InfoExtractor):
_VALID_URL = r'https://beatbump\.ml/listen\?id=(?P<id>[\w-]+)'
_VALID_URL = r'https://beatbump\.(?:ml|io)/listen\?id=(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs',
'md5': '5ff3fff41d3935b9810a9731e485fe66',
'info_dict': {
'id': 'MgNrAu2pzNs',
'ext': 'mp4',
'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
'artist': 'Stephen',
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
@ -22,10 +21,9 @@ class BeatBumpVideoIE(InfoExtractor):
'alt_title': 'Voyeur Girl',
'view_count': int,
'track': 'Voyeur Girl',
'uploader': 'Stephen - Topic',
'uploader': 'Stephen',
'title': 'Voyeur Girl',
'channel_follower_count': int,
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
'age_limit': 0,
'availability': 'public',
'live_status': 'not_live',
@ -36,7 +34,12 @@ class BeatBumpVideoIE(InfoExtractor):
'tags': 'count:11',
'creator': 'Stephen',
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
}
'channel_is_verified': True,
'heatmap': 'count:100',
},
}, {
'url': 'https://beatbump.io/listen?id=LDGZAprNGWo',
'only_matching': True,
}]
def _real_extract(self, url):
@ -45,7 +48,7 @@ def _real_extract(self, url):
class BeatBumpPlaylistIE(InfoExtractor):
_VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
_VALID_URL = r'https://beatbump\.(?:ml|io)/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE',
'playlist_count': 50,
@ -56,25 +59,28 @@ class BeatBumpPlaylistIE(InfoExtractor):
'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
'description': '',
'tags': [],
'modified_date': '20221223',
}
'modified_date': '20231110',
},
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg',
'playlist_mincount': 1,
'params': {'flatplaylist': True},
'info_dict': {
'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_id': '@NoCopyrightSounds',
'channel_follower_count': int,
'title': 'NoCopyrightSounds - Videos',
'title': 'NoCopyrightSounds',
'uploader': 'NoCopyrightSounds',
'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a',
'channel': 'NoCopyrightSounds',
'tags': 'count:12',
'tags': 'count:65',
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'channel_is_verified': True,
},
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'playlist_mincount': 1,
@ -84,16 +90,20 @@ class BeatBumpPlaylistIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
'view_count': int,
'channel_url': 'https://www.youtube.com/@NoCopyrightSounds',
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
'uploader_id': '@NoCopyrightSounds',
'title': 'NCS : All Releases 💿',
'uploader': 'NoCopyrightSounds',
'availability': 'public',
'channel': 'NoCopyrightSounds',
'tags': [],
'modified_date': '20221225',
'modified_date': '20231112',
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
}
},
'expected_warnings': ['YouTube Music is not directly supported'],
}, {
'url': 'https://beatbump.io/playlist/VLPLFCHGavqRG-q_2ZhmgU2XB2--ZY6irT1c',
'only_matching': True,
}]
def _real_extract(self, url):

View file

@ -3,6 +3,7 @@
class BehindKinkIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
_TEST = {
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',

View file

@ -1,10 +1,9 @@
from .mtv import MTVServicesInfoExtractor
from ..utils import unified_strdate
# TODO Remove - Reason: Outdated Site
class BetIE(MTVServicesInfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html'
_TESTS = [
{

View file

@ -5,6 +5,7 @@
class BFIPlayerIE(InfoExtractor):
_WORKING = False
IE_NAME = 'bfi:player'
_VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
_TEST = {

View file

@ -7,7 +7,7 @@
class BFMTVBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _brightcove_url_result(self, video_id, video_block):
@ -55,8 +55,11 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
'ext': 'mp4',
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader_id': '876450610001',
'upload_date': '20171018',
'timestamp': 1508329950,
'upload_date': '20220926',
'timestamp': 1664207191,
'live_status': 'is_live',
'thumbnail': r're:https://.+/image\.jpg',
'tags': [],
},
'params': {
'skip_download': True,

View file

@ -29,7 +29,8 @@ def _real_extract(self, url):
info_raw = self._download_json(
'https://ta.bigo.tv/official_website/studio/getInternalStudioInfo',
user_id, data=urlencode_postdata({'siteId': user_id}))
user_id, data=urlencode_postdata({'siteId': user_id}),
headers={'Accept': 'application/json'})
if not isinstance(info_raw, dict):
raise ExtractorError('Received invalid JSON data')

View file

@ -2,6 +2,7 @@
import functools
import hashlib
import itertools
import json
import math
import re
import time
@ -16,9 +17,12 @@
InAdvancePagedList,
OnDemandPagedList,
bool_or_none,
clean_html,
determine_ext,
filter_dict,
float_or_none,
format_field,
get_element_by_class,
int_or_none,
join_nonempty,
make_archive_id,
@ -115,6 +119,12 @@ def extract_formats(self, play_info, is_dash=True):
return formats
def _download_playinfo(self, video_id, cid):
return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
note=f'Downloading video formats for cid {cid}')['data']
def json2srt(self, json_data):
srt_data = ''
for idx, line in enumerate(json_data.get('body') or []):
@ -123,7 +133,7 @@ def json2srt(self, json_data):
f'{line["content"]}\n\n')
return srt_data
def _get_subtitles(self, video_id, aid, cid):
def _get_subtitles(self, video_id, cid, aid=None):
subtitles = {
'danmaku': [{
'ext': 'xml',
@ -131,8 +141,15 @@ def _get_subtitles(self, video_id, aid, cid):
}]
}
video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
subtitle_info = traverse_obj(self._download_json(
'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
for s in subs_list:
subtitles.setdefault(s['lan'], []).append({
'ext': 'srt',
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
@ -182,7 +199,7 @@ def _get_episodes_from_season(self, ss_id, url):
for entry in traverse_obj(season_info, (
'result', 'main_section', 'episodes',
lambda _, v: url_or_none(v['share_url']) and v['id'])):
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
def _enc_wbi(self, params: dict, video_id=None):
if video_id is None:
@ -221,6 +238,53 @@ def _get_play_url(self, bvid: str, cid: str, headers, qn: int = None, is_dash: b
note='Extracting' + (' dash ' if is_dash else ' non-dash ') + 'video formats',
headers=headers)['data']
def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
cid_edges = cid_edges or {}
division_data = self._download_json(
'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
note=f'Extracting divisions from edge {edge_id}')
edges.setdefault(edge_id, {}).update(
traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
'title': ('title', {str}),
'cid': ('cid', {int_or_none}),
}), get_all=False))
edges[edge_id].update(traverse_obj(division_data, ('data', {
'title': ('title', {str}),
'choices': ('edges', 'questions', ..., 'choices', ..., {
'edge_id': ('id', {int_or_none}),
'cid': ('cid', {int_or_none}),
'text': ('option', {str}),
}),
})))
# use dict to combine edges that use the same video section (same cid)
cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
if choice['edge_id'] not in edges:
edges[choice['edge_id']] = {'cid': choice['cid']}
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
return cid_edges
def _get_interactive_entries(self, video_id, cid, metainfo):
graph_version = traverse_obj(
self._download_json(
'https://api.bilibili.com/x/player/wbi/v2', video_id,
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid)
yield {
**metainfo,
'id': f'{video_id}_{cid}',
'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
'formats': self.extract_formats(play_info),
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(video_id, cid),
}
class BiliBiliIE(BilibiliBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
@ -244,7 +308,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
},
}, {
# old av URL version
'note': 'old av URL version',
'url': 'http://www.bilibili.com/video/av1074402/',
'info_dict': {
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
@ -427,11 +491,95 @@ class BiliBiliIE(BilibiliBaseIE):
'upload_date': '20160317',
},
'params': {'skip_download': True}
}, {
'note': 'interactive/split-path video',
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
'info_dict': {
'id': 'BV1af4y1H7ga',
'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
'timestamp': 1630500414,
'upload_date': '20210901',
'description': 'md5:01113e39ab06e28042d74ac356a08786',
'tags': list,
'uploader': '钉宫妮妮Ninico',
'duration': 1503,
'uploader_id': '8881297',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'playlist_count': 33,
'playlist': [{
'info_dict': {
'id': 'BV1af4y1H7ga_400950101',
'ext': 'mp4',
'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
'timestamp': 1630500414,
'upload_date': '20210901',
'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
'tags': list,
'uploader': '钉宫妮妮Ninico',
'duration': 11.605,
'uploader_id': '8881297',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}],
}, {
'note': '301 redirect to bangumi link',
'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
'info_dict': {
'id': '288525',
'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
'ext': 'mp4',
'series': '我和我的祖国',
'series_id': '4780',
'season': '幕后纪实',
'season_id': '28609',
'season_number': 1,
'episode': '钱学森弹道和乘波体飞行器是什么?',
'episode_id': '288525',
'episode_number': 105,
'duration': 1183.957,
'timestamp': 1571648124,
'upload_date': '20191021',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}, {
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
'info_dict': {
'id': 'BV1jL41167ZG',
'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
'ext': 'mp4',
},
'skip': 'supporter-only video',
}, {
'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
'info_dict': {
'id': 'BV1Ks411f7aQ',
'title': '【BD1080P】狼与香辛料I【华盟】',
'ext': 'mp4',
},
'skip': 'login required',
}, {
'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
'info_dict': {
'id': 'BV1GJ411x7h7',
'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
'ext': 'mp4',
},
'skip': 'geo-restricted',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id)
if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
headers = {'Referer': url, **self.geo_verification_headers()}
play_info = {}
@ -440,7 +588,25 @@ def _real_extract(self, url):
if is_festival:
video_data = initial_state['videoInfo']
else:
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
play_info_obj = self._search_json(
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
if not play_info_obj:
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
self.raise_login_required()
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
raise ExtractorError(
'This video may be deleted or geo-restricted. '
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
play_info = traverse_obj(play_info_obj, ('data', {dict}))
if not play_info:
if traverse_obj(play_info_obj, 'code') == 87007:
toast = get_element_by_class('tips-toast', webpage) or ''
msg = clean_html(
f'{get_element_by_class("belongs-to", toast) or ""}'
+ (get_element_by_class('level', toast) or ''))
raise ExtractorError(
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
raise ExtractorError('Failed to extract play info')
video_data = initial_state['videoData']
video_id, title = video_data['bvid'], video_data.get('title')
@ -505,25 +671,57 @@ def _real_extract(self, url):
'formats': self.extract_formats(play_info) if is_dash else self.extract_formats(play_info, is_dash=False),
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
'title': title,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, aid, cid),
'__post_extractor': self.extract_comments(aid),
'http_headers': {'Referer': url},
}
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
if is_interactive:
return self.playlist_result(
self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
'__post_extractor': self.extract_comments(aid),
})
else:
return {
**metainfo,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'formats': self.extract_formats(play_info),
'__post_extractor': self.extract_comments(aid),
}
class BiliBiliBangumiIE(BilibiliBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
'info_dict': {
'id': '21495',
'ext': 'mp4',
'series': '悠久之翼',
'series_id': '774',
'season': '第二季',
'season_id': '1182',
'season_number': 2,
'episode': 'foreveref',
'episode_id': '21495',
'episode_number': 12,
'title': '12 foreveref',
'duration': 1420.791,
'timestamp': 1320412200,
'upload_date': '20111104',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}, {
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
'info_dict': {
'id': '267851',
'ext': 'mp4',
'series': '鬼灭之刃',
'series_id': '4358',
'season': '鬼灭之刃',
'season': '立志篇',
'season_id': '26801',
'season_number': 1,
'episode': '残酷',
@ -535,13 +733,32 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'upload_date': '20190406',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
'skip': 'Geo-restricted',
}, {
'note': 'a making-of which falls outside main section',
'url': 'https://www.bilibili.com/bangumi/play/ep345120',
'info_dict': {
'id': '345120',
'ext': 'mp4',
'series': '鬼灭之刃',
'series_id': '4358',
'season': '立志篇',
'season_id': '26801',
'season_number': 1,
'episode': '炭治郎篇',
'episode_id': '345120',
'episode_number': 27,
'title': '#1 炭治郎篇',
'duration': 1922.129,
'timestamp': 1602853860,
'upload_date': '20201016',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
episode_id = video_id[2:]
webpage = self._download_webpage(url, video_id)
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted')
@ -550,7 +767,7 @@ def _real_extract(self, url):
headers = {'Referer': url, **self.geo_verification_headers()}
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
headers=headers)
premium_only = play_info.get('code') == -10403
@ -561,40 +778,43 @@ def _real_extract(self, url):
self.raise_login_required('This video is for premium members only')
bangumi_info = self._download_json(
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
query={'ep_id': episode_id}, headers=headers)['result']
episode_number, episode_info = next((
(idx, ep) for idx, ep in enumerate(traverse_obj(
bangumi_info, ('episodes', ..., {dict})), 1)
bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
if str_or_none(ep.get('id')) == episode_id), (1, {}))
season_id = bangumi_info.get('season_id')
season_number = season_id and next((
idx + 1 for idx, e in enumerate(
season_number, season_title = season_id and next((
(idx + 1, e.get('season_title')) for idx, e in enumerate(
traverse_obj(bangumi_info, ('seasons', ...)))
if e.get('season_id') == season_id
), None)
), (None, None))
aid = episode_info.get('aid')
return {
'id': video_id,
'id': episode_id,
'formats': formats,
**traverse_obj(bangumi_info, {
'series': ('series', 'series_title', {str}),
'series_id': ('series', 'series_id', {str_or_none}),
'thumbnail': ('square_cover', {url_or_none}),
}),
'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
'episode': episode_info.get('long_title'),
**traverse_obj(episode_info, {
'episode': ('long_title', {str}),
'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
'timestamp': ('pub_time', {int_or_none}),
'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
}),
'episode_id': episode_id,
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
'season': str_or_none(season_title),
'season_id': str_or_none(season_id),
'season_number': season_number,
'timestamp': int_or_none(episode_info.get('pub_time')),
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
'__post_extractor': self.extract_comments(aid),
'http_headers': headers,
}
@ -606,17 +826,53 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
'url': 'https://www.bilibili.com/bangumi/media/md24097891',
'info_dict': {
'id': '24097891',
'title': 'CAROLE & TUESDAY',
'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
},
'playlist_mincount': 25,
}, {
'url': 'https://www.bilibili.com/bangumi/media/md1565/',
'info_dict': {
'id': '1565',
'title': '攻壳机动队 S.A.C. 2nd GIG',
'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
},
'playlist_count': 26,
'playlist': [{
'info_dict': {
'id': '68540',
'ext': 'mp4',
'series': '攻壳机动队',
'series_id': '1077',
'season': '第二季',
'season_id': '1565',
'season_number': 2,
'episode': '再启动 REEMBODY',
'episode_id': '68540',
'episode_number': 1,
'title': '1 再启动 REEMBODY',
'duration': 1525.777,
'timestamp': 1425074413,
'upload_date': '20150227',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
}],
}]
def _real_extract(self, url):
media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id)
ss_id = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
initial_state = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
ss_id = initial_state['mediaInfo']['season_id']
return self.playlist_result(
self._get_episodes_from_season(ss_id, url), media_id,
**traverse_obj(initial_state, ('mediaInfo', {
'title': ('title', {str}),
'description': ('evaluate', {str}),
})))
class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
@ -624,15 +880,183 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
'info_dict': {
'id': '26801'
'id': '26801',
'title': '鬼灭之刃',
'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
},
'playlist_mincount': 26
}, {
'url': 'https://www.bilibili.com/bangumi/play/ss2251',
'info_dict': {
'id': '2251',
'title': '玲音',
'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
},
'playlist_count': 13,
'playlist': [{
'info_dict': {
'id': '50188',
'ext': 'mp4',
'series': '玲音',
'series_id': '1526',
'season': 'TV',
'season_id': '2251',
'season_number': 1,
'episode': 'WEIRD',
'episode_id': '50188',
'episode_number': 1,
'title': '1 WEIRD',
'duration': 1436.992,
'timestamp': 1343185080,
'upload_date': '20120725',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
},
}],
}]
def _real_extract(self, url):
ss_id = self._match_id(url)
webpage = self._download_webpage(url, ss_id)
metainfo = traverse_obj(
self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
('itemListElement', ..., {
'title': ('name', {str}),
'description': ('description', {str}),
}), get_all=False)
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
class BilibiliCheeseBaseIE(BilibiliBaseIE):
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
def _extract_episode(self, season_info, ep_id):
episode_info = traverse_obj(season_info, (
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
aid, cid = episode_info['aid'], episode_info['cid']
if traverse_obj(episode_info, 'ep_status') == -1:
raise ExtractorError('This course episode is not yet available.', expected=True)
if not traverse_obj(episode_info, 'playable'):
self.raise_login_required('You need to purchase the course to download this episode')
play_info = self._download_json(
'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
headers=self._HEADERS, note='Downloading playinfo')['data']
return {
'id': str_or_none(ep_id),
'episode_id': str_or_none(ep_id),
'formats': self.extract_formats(play_info),
'extractor_key': BilibiliCheeseIE.ie_key(),
'extractor': BilibiliCheeseIE.IE_NAME,
'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
**traverse_obj(episode_info, {
'episode': ('title', {str}),
'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
'alt_title': ('subtitle', {str}),
'duration': ('duration', {int_or_none}),
'episode_number': ('index', {int_or_none}),
'thumbnail': ('cover', {url_or_none}),
'timestamp': ('release_date', {int_or_none}),
'view_count': ('play', {int_or_none}),
}),
**traverse_obj(season_info, {
'uploader': ('up_info', 'uname', {str}),
'uploader_id': ('up_info', 'mid', {str_or_none}),
}),
'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
'__post_extractor': self.extract_comments(aid),
'http_headers': self._HEADERS,
}
def _download_season_info(self, query_key, video_id):
return self._download_json(
f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
headers=self._HEADERS, note='Downloading season info')['data']
class BilibiliCheeseIE(BilibiliCheeseBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/cheese/play/ep229832',
'info_dict': {
'id': '229832',
'ext': 'mp4',
'title': '1 - 课程先导片',
'alt_title': '视频课·3分41秒',
'uploader': '马督工',
'uploader_id': '316568752',
'episode': '课程先导片',
'episode_id': '229832',
'episode_number': 1,
'duration': 221,
'timestamp': 1695549606,
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
}
}]
def _real_extract(self, url):
ep_id = self._match_id(url)
return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/cheese/play/ss5918',
'info_dict': {
'id': '5918',
'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
},
'playlist': [{
'info_dict': {
'id': '229832',
'ext': 'mp4',
'title': '1 - 课程先导片',
'alt_title': '视频课·3分41秒',
'uploader': '马督工',
'uploader_id': '316568752',
'episode': '课程先导片',
'episode_id': '229832',
'episode_number': 1,
'duration': 221,
'timestamp': 1695549606,
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
}
}],
'params': {'playlist_items': '1'},
}, {
'url': 'https://www.bilibili.com/cheese/play/ss5918',
'info_dict': {
'id': '5918',
'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
},
'playlist_mincount': 5,
'skip': 'paid video in list',
}]
def _get_cheese_entries(self, season_info):
for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
yield self._extract_episode(season_info, ep_id)
def _real_extract(self, url):
season_id = self._match_id(url)
season_info = self._download_season_info('season_id', season_id)
return self.playlist_result(
self._get_cheese_entries(season_info), season_id,
**traverse_obj(season_info, {
'title': ('title', {str}),
'description': ('subtitle', {str}),
}))
class BilibiliSpaceBaseIE(BilibiliBaseIE):
@ -1267,6 +1691,7 @@ def _real_extract(self, url):
class BiliIntlBaseIE(InfoExtractor):
_API_URL = 'https://api.bilibili.tv/intl/gateway'
_NETRC_MACHINE = 'biliintl'
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
def _call_api(self, endpoint, *args, **kwargs):
json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
@ -1304,19 +1729,34 @@ def _get_subtitles(self, *, ep_id=None, aid=None):
'aid': aid,
})) or {}
subtitles = {}
for sub in sub_json.get('subtitles') or []:
sub_url = sub.get('url')
if not sub_url:
continue
sub_data = self._download_json(
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
if not sub_data:
continue
subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
'ext': 'srt',
'data': self.json2srt(sub_data)
})
fetched_urls = set()
for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
if url in fetched_urls:
continue
fetched_urls.add(url)
sub_ext = determine_ext(url)
sub_lang = sub.get('lang_key') or 'en'
if sub_ext == 'ass':
subtitles.setdefault(sub_lang, []).append({
'ext': 'ass',
'url': url,
})
elif sub_ext == 'json':
sub_data = self._download_json(
url, ep_id or aid, fatal=False,
note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
errnote='Unable to download subtitles')
if sub_data:
subtitles.setdefault(sub_lang, []).append({
'ext': 'srt',
'data': self.json2srt(sub_data),
})
else:
self.report_warning('Unexpected subtitle extension', ep_id or aid)
return subtitles
def _get_formats(self, *, ep_id=None, aid=None):
@ -1362,7 +1802,9 @@ def _get_formats(self, *, ep_id=None, aid=None):
def _parse_video_metadata(self, video_data):
return {
'title': video_data.get('title_display') or video_data.get('title'),
'description': video_data.get('desc'),
'thumbnail': video_data.get('cover'),
'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
'episode_number': int_or_none(self._search_regex(
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
}
@ -1460,17 +1902,6 @@ class BiliIntlIE(BiliIntlBaseIE):
'episode_number': 140,
},
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
}, {
'url': 'https://www.bilibili.tv/en/video/2041863208',
'info_dict': {
'id': '2041863208',
'ext': 'mp4',
'timestamp': 1670874843,
'description': 'Scheduled for April 2023.\nStudio: ufotable',
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
'upload_date': '20221212',
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
},
}, {
# episode comment extraction
'url': 'https://www.bilibili.tv/en/play/34580/340317',
@ -1511,9 +1942,9 @@ class BiliIntlIE(BiliIntlBaseIE):
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
'timestamp': 1667891924,
'upload_date': '20221108',
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
'comment_count': int,
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
},
'params': {
'getcomments': True
@ -1577,10 +2008,12 @@ def _extract_video_metadata(self, url, video_id, season_id):
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
return merge_dicts(
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
'title': self._html_search_meta('og:title', webpage),
'description': self._html_search_meta('og:description', webpage)
})
self._parse_video_metadata(video_data), {
'title': get_element_by_class(
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
'description': get_element_by_class(
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
}, self._search_json_ld(webpage, video_id, default={}))
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
comment_api_raw_data = self._download_json(
@ -1668,7 +2101,8 @@ def _real_extract(self, url):
'formats': self._get_formats(ep_id=ep_id, aid=aid),
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
'chapters': chapters,
'__post_extractor': self.extract_comments(video_id, ep_id)
'__post_extractor': self.extract_comments(video_id, ep_id),
'http_headers': self._HEADERS,
}

View file

@ -1,110 +0,0 @@
from .common import InfoExtractor
from .vk import VKIE
from ..compat import compat_b64decode
from ..utils import (
int_or_none,
js_to_json,
traverse_obj,
unified_timestamp,
)
class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{
'url': 'https://biqle.ru/watch/-2000421746_85421746',
'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
'info_dict': {
'id': '-2000421746_85421746',
'ext': 'mp4',
'title': 'Forsaken By Hope Studio Clip',
'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
'upload_date': '19700101',
'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
'timestamp': 0,
},
}, {
'url': 'http://biqle.org/watch/-44781847_168547604',
'md5': '7f24e72af1db0edf7c1aaba513174f97',
'info_dict': {
'id': '-44781847_168547604',
'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки',
'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
'timestamp': 1396633454,
'upload_date': '20140404',
'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta('name', webpage, 'Title', fatal=False)
timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
description = self._html_search_meta('description', webpage, 'Description', default=None)
global_embed_url = self._search_regex(
r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
webpage, 'global Embed url')
hash = self._search_regex(
r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
embed_url = global_embed_url + hash
if VKIE.suitable(embed_url):
return self.url_result(embed_url, VKIE.ie_key(), video_id)
embed_page = self._download_webpage(
embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
glob_params = self._parse_json(self._search_regex(
r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
host_name = compat_b64decode(glob_params['server'][::-1]).decode()
item = self._download_json(
f'https://{host_name}/method/video.get/{video_id}', video_id,
headers={'Referer': url}, query={
'token': glob_params['video']['access_token'],
'videos': video_id,
'ckey': glob_params['c_key'],
'credentials': glob_params['video']['credentials'],
})['response']['items'][0]
formats = []
for f_id, f_url in item.get('files', {}).items():
if f_id == 'external':
return self.url_result(f_url)
ext, height = f_id.split('_')
height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
if height_extra_key:
formats.append({
'format_id': f'{height}p',
'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
'height': int_or_none(height),
'ext': ext,
})
thumbnails = []
for k, v in item.items():
if k.startswith('photo_') and v:
width = k.replace('photo_', '')
thumbnails.append({
'id': width,
'url': v,
'width': int_or_none(width),
})
return {
'id': video_id,
'title': title,
'formats': formats,
'comment_count': int_or_none(item.get('comments')),
'description': description,
'duration': int_or_none(item.get('duration')),
'thumbnails': thumbnails,
'timestamp': timestamp,
'view_count': int_or_none(item.get('views')),
}

View file

@ -7,8 +7,10 @@
ExtractorError,
OnDemandPagedList,
clean_html,
extract_attributes,
get_element_by_class,
get_element_by_id,
get_element_html_by_class,
get_elements_html_by_class,
int_or_none,
orderedSet,
@ -17,6 +19,7 @@
traverse_obj,
unified_strdate,
urlencode_postdata,
urljoin,
)
@ -34,6 +37,25 @@ class BitChuteIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute',
'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
},
}, {
# test case: video with different channel and uploader
'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/',
'md5': 'f10e6a8e787766235946d0868703f1d0',
'info_dict': {
'id': 'Yti_j9A-UZ4',
'ext': 'mp4',
'title': 'Israel at War | Full Measure',
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'sharylattkisson',
'upload_date': '20231106',
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
'channel': 'Full Measure with Sharyl Attkisson',
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
},
}, {
# video not downloadable in browser, but we can recover it
@ -48,6 +70,9 @@ class BitChuteIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute',
'upload_date': '20181113',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
},
'params': {'check_formats': None},
}, {
@ -99,6 +124,11 @@ def _raise_if_restricted(self, webpage):
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
self.raise_geo_restricted(reason)
@staticmethod
def _make_url(html):
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
return urljoin('https://www.bitchute.com', path)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
@ -121,12 +151,19 @@ def _real_extract(self, url):
'Video is unavailable. Please make sure this video is playable in the browser '
'before reporting this issue.', expected=True, video_id=video_id)
details = get_element_by_class('details', webpage) or ''
uploader_html = get_element_html_by_class('creator', details) or ''
channel_html = get_element_html_by_class('name', details) or ''
return {
'id': video_id,
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'uploader': clean_html(get_element_by_class('owner', webpage)),
'uploader': clean_html(uploader_html),
'uploader_url': self._make_url(uploader_html),
'channel': clean_html(channel_html),
'channel_url': self._make_url(channel_html),
'upload_date': unified_strdate(self._search_regex(
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
'formats': formats,
@ -154,6 +191,9 @@ class BitChuteChannelIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'BitChute',
'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
'duration': 16,
'view_count': int,
},
@ -169,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor):
'info_dict': {
'id': 'wV9Imujxasw9',
'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
'description': 'md5:747724ef404eebdfc04277714f81863e',
}
}]

View file

@ -1,58 +0,0 @@
from .common import InfoExtractor
class BitwaveReplayIE(InfoExtractor):
IE_NAME = 'bitwave:replay'
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<user>\w+)/replay/(?P<id>\w+)/?$'
_TEST = {
'url': 'https://bitwave.tv/RhythmicCarnage/replay/z4P6eq5L7WDrM85UCrVr',
'only_matching': True
}
def _real_extract(self, url):
replay_id = self._match_id(url)
replay = self._download_json(
'https://api.bitwave.tv/v1/replays/' + replay_id,
replay_id
)
return {
'id': replay_id,
'title': replay['data']['title'],
'uploader': replay['data']['name'],
'uploader_id': replay['data']['name'],
'url': replay['data']['url'],
'thumbnails': [
{'url': x} for x in replay['data']['thumbnails']
],
}
class BitwaveStreamIE(InfoExtractor):
IE_NAME = 'bitwave:stream'
_VALID_URL = r'https?://(?:www\.)?bitwave\.tv/(?P<id>\w+)/?$'
_TEST = {
'url': 'https://bitwave.tv/doomtube',
'only_matching': True
}
def _real_extract(self, url):
username = self._match_id(url)
channel = self._download_json(
'https://api.bitwave.tv/v1/channels/' + username,
username)
formats = self._extract_m3u8_formats(
channel['data']['url'], username,
'mp4')
return {
'id': username,
'title': channel['data']['title'],
'uploader': username,
'uploader_id': username,
'formats': formats,
'thumbnail': channel['data']['thumbnail'],
'is_live': True,
'view_count': channel['data']['viewCount']
}

View file

@ -22,7 +22,7 @@ class BleacherReportIE(InfoExtractor):
'upload_date': '20150615',
'uploader': 'Team Stream Now ',
},
'add_ie': ['Ooyala'],
'skip': 'Video removed',
}, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': '6a5cd403418c7b01719248ca97fb0692',
@ -70,8 +70,6 @@ def _real_extract(self, url):
video_type = video['type']
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
elif video_type == 'ooyala.com':
info['url'] = 'ooyala:%s' % video['id']
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':

View file

@ -1,16 +1,17 @@
import json
import urllib.parse
from .common import InfoExtractor
from ..utils import (
determine_ext,
parse_iso8601,
# try_get,
update_url_query,
url_or_none,
)
from ..utils.traversal import traverse_obj
class BoxIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)/file/(?P<id>\d+)'
_TEST = {
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
@ -18,11 +19,12 @@ class BoxIE(InfoExtractor):
'id': '510727257538',
'ext': 'mp4',
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
'uploader': 'MLS Video',
'uploader': '',
'timestamp': 1566320259,
'upload_date': '20190820',
'uploader_id': '235196876',
}
},
'params': {'skip_download': 'dash fragment too small'},
}
def _real_extract(self, url):
@ -58,26 +60,15 @@ def _real_extract(self, url):
formats = []
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
# entry_url_template = try_get(
# entry, lambda x: x['content']['url_template'])
# if not entry_url_template:
# continue
# representation = entry.get('representation')
# if representation == 'dash':
# TODO: append query to every fragment URL
# formats.extend(self._extract_mpd_formats(
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
# file_id, query=query))
authenticated_download_url = f.get('authenticated_download_url')
if authenticated_download_url and f.get('is_download_available'):
formats.append({
'ext': f.get('extension') or determine_ext(title),
'filesize': f.get('size'),
'format_id': 'download',
'url': update_url_query(authenticated_download_url, query),
})
for url_tmpl in traverse_obj(f, (
'representations', 'entries', lambda _, v: v['representation'] == 'dash',
'content', 'url_template', {url_or_none}
)):
manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query)
fmts = self._extract_mpd_formats(manifest_url, file_id)
for fmt in fmts:
fmt['extra_param_to_segment_url'] = urllib.parse.urlparse(manifest_url).query
formats.extend(fmts)
creator = f.get('created_by') or {}

View file

@ -1,18 +1,15 @@
import json
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
parse_duration,
parse_iso8601,
xpath_element,
xpath_text,
)
class BRIE(InfoExtractor):
_WORKING = False
IE_DESC = 'Bayerischer Rundfunk'
_VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html'
@ -167,142 +164,3 @@ def _extract_thumbnails(self, variants, base_url):
} for variant in variants.findall('variant') if xpath_text(variant, 'url')]
thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True)
return thumbnails
class BRMediathekIE(InfoExtractor):
IE_DESC = 'Bayerischer Rundfunk Mediathek'
_VALID_URL = r'https?://(?:www\.)?br\.de/mediathek//?video/(?:[^/?&#]+?-)?(?P<id>av:[0-9a-f]{24})'
_TESTS = [{
'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e',
'md5': 'fdc3d485835966d1622587d08ba632ec',
'info_dict': {
'id': 'av:5a1e6a6e8fce6d001871cc8e',
'ext': 'mp4',
'title': 'Die Sendung vom 28.11.2017',
'description': 'md5:6000cdca5912ab2277e5b7339f201ccc',
'timestamp': 1511942766,
'upload_date': '20171129',
}
}, {
'url': 'https://www.br.de/mediathek//video/av:61b0db581aed360007558c12',
'only_matching': True,
}]
def _real_extract(self, url):
clip_id = self._match_id(url)
clip = self._download_json(
'https://proxy-base.master.mango.express/graphql',
clip_id, data=json.dumps({
"query": """{
viewer {
clip(id: "%s") {
title
description
duration
createdAt
ageRestriction
videoFiles {
edges {
node {
publicLocation
fileSize
videoProfile {
width
height
bitrate
encoding
}
}
}
}
captionFiles {
edges {
node {
publicLocation
}
}
}
teaserImages {
edges {
node {
imageFiles {
edges {
node {
publicLocation
width
height
}
}
}
}
}
}
}
}
}""" % clip_id}).encode(), headers={
'Content-Type': 'application/json',
})['data']['viewer']['clip']
title = clip['title']
formats = []
for edge in clip.get('videoFiles', {}).get('edges', []):
node = edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
ext = determine_ext(n_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
n_url, clip_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
else:
video_profile = node.get('videoProfile', {})
tbr = int_or_none(video_profile.get('bitrate'))
format_id = 'http'
if tbr:
format_id += '-%d' % tbr
formats.append({
'format_id': format_id,
'url': n_url,
'width': int_or_none(video_profile.get('width')),
'height': int_or_none(video_profile.get('height')),
'tbr': tbr,
'filesize': int_or_none(node.get('fileSize')),
})
subtitles = {}
for edge in clip.get('captionFiles', {}).get('edges', []):
node = edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
subtitles.setdefault('de', []).append({
'url': n_url,
})
thumbnails = []
for edge in clip.get('teaserImages', {}).get('edges', []):
for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []):
node = image_edge.get('node', {})
n_url = node.get('publicLocation')
if not n_url:
continue
thumbnails.append({
'url': n_url,
'width': int_or_none(node.get('width')),
'height': int_or_none(node.get('height')),
})
return {
'id': clip_id,
'title': title,
'description': clip.get('description'),
'duration': int_or_none(clip.get('duration')),
'timestamp': parse_iso8601(clip.get('createdAt')),
'age_limit': int_or_none(clip.get('ageRestriction')),
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
}

View file

@ -1,86 +0,0 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
int_or_none,
url_or_none,
)
class BreakIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)'
_TESTS = [{
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
'info_dict': {
'id': '2468056',
'ext': 'mp4',
'title': 'When Girls Act Like D-Bags',
'age_limit': 13,
},
}, {
# youtube embed
'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work',
'info_dict': {
'id': 'RrrDLdeL2HQ',
'ext': 'mp4',
'title': 'Whale Watching Boat Crashing Into San Diego Dock',
'description': 'md5:afc1b2772f0a8468be51dd80eb021069',
'upload_date': '20160331',
'uploader': 'Steve Holden',
'uploader_id': 'sdholden07',
},
'params': {
'skip_download': True,
}
}, {
'url': 'http://www.break.com/video/ugc/baby-flex-2773063',
'only_matching': True,
}]
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
youtube_url = YoutubeIE._extract_url(webpage)
if youtube_url:
return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
content = self._parse_json(
self._search_regex(
r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage,
'content'),
display_id)
formats = []
for video in content:
video_url = url_or_none(video.get('url'))
if not video_url:
continue
bitrate = int_or_none(self._search_regex(
r'(\d+)_kbps', video_url, 'tbr', default=None))
formats.append({
'url': video_url,
'format_id': 'http-%d' % bitrate if bitrate else 'http',
'tbr': bitrate,
})
title = self._search_regex(
(r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value')
def get(key, name):
return int_or_none(self._search_regex(
r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name,
default=None))
age_limit = get('ratings', 'age limit')
video_id = video_id or get('pid', 'video id') or display_id
return {
'id': video_id,
'display_id': display_id,
'title': title,
'thumbnail': self._og_search_thumbnail(webpage),
'age_limit': age_limit,
'formats': formats,
}

View file

@ -21,10 +21,10 @@ def _initialize_pre_login(self):
def _get_logged_in_username(self, url, video_id):
webpage, urlh = self._download_webpage_handle(url, video_id)
if self._LOGIN_API == urlh.url:
if urlh.url.startswith(self._LOGIN_API):
self.raise_login_required()
return self._html_search_regex(
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username')
def _perform_login(self, username, password):
login_form = self._hidden_inputs(self._download_webpage(

View file

@ -0,0 +1,123 @@
import re
from functools import partial
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
bug_reports_message,
clean_html,
format_field,
get_element_text_and_html_by_tag,
int_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class BundestagIE(InfoExtractor):
_VALID_URL = [
r'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)',
r'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)',
]
_TESTS = [{
'url': 'https://dbtg.tv/cvid/7605304',
'info_dict': {
'id': '7605304',
'ext': 'mp4',
'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit',
'description': 'md5:321a9dc6bdad201264c0045efc371561',
},
}, {
'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek',
'info_dict': {
'id': '7602120',
'ext': 'mp4',
'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung',
'description': 'Befragung der Bundesregierung',
},
}, {
'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek',
'only_matching': True,
}, {
'url': 'http://dbtg.tv/fvid/3594346',
'only_matching': True,
}]
_OVERLAY_URL = 'https://www.bundestag.de/mediathekoverlay'
_INSTANCE_FORMAT = 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8'
_SHARE_URL = 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId='
_SHARE_AUDIO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)'
_SHARE_VIDEO_REGEX = r'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)'
def _bt_extract_share_formats(self, video_id):
share_data = self._download_json(
f'{self._SHARE_URL}{video_id}', video_id, note='Downloading share format JSON')
if traverse_obj(share_data, ('status', 'code', {int})) != 1:
self.report_warning(format_field(
share_data, [('status', 'message', {str})],
'Share API response: %s', default='Unknown Share API Error')
+ bug_reports_message())
return
for name, url in share_data.items():
if not isinstance(name, str) or not url_or_none(url):
continue
elif name.startswith('audio'):
match = re.search(self._SHARE_AUDIO_REGEX, url)
yield {
'format_id': name,
'url': url,
'vcodec': 'none',
**traverse_obj(match, {
'acodec': 'codec',
'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get}),
'abr': ('bitrate', {int_or_none}),
'ext': 'ext',
}),
}
elif name.startswith('download'):
match = re.search(self._SHARE_VIDEO_REGEX, url)
yield {
'format_id': name,
'url': url,
**traverse_obj(match, {
'vcodec': 'codec',
'tbr': ('bitrate', {int_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'ext': 'ext',
}),
}
def _real_extract(self, url):
video_id = self._match_id(url)
formats = []
result = {'id': video_id, 'formats': formats}
try:
formats.extend(self._extract_m3u8_formats(
self._INSTANCE_FORMAT.format(video_id), video_id, m3u8_id='instance'))
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 404:
raise ExtractorError('Could not find video id', expected=True)
self.report_warning(f'Error extracting hls formats: {error}', video_id)
formats.extend(self._bt_extract_share_formats(video_id))
if not formats:
self.raise_no_formats('Could not find suitable formats', video_id=video_id)
result.update(traverse_obj(self._download_webpage(
self._OVERLAY_URL, video_id,
query={'videoid': video_id, 'view': 'main'},
note='Downloading metadata overlay', fatal=False,
), {
'title': (
{partial(get_element_text_and_html_by_tag, 'h3')}, 0,
{partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
}))
return result

View file

@ -8,9 +8,9 @@
class BYUtvIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{
# ooyalaVOD
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': {
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
@ -24,7 +24,6 @@ class BYUtvIE(InfoExtractor):
'params': {
'skip_download': True,
},
'add_ie': ['Ooyala'],
}, {
# dvr
'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
@ -63,19 +62,6 @@ def _real_extract(self, url):
'x-byutv-platformkey': 'xsaaw9c7y5',
})
ep = video.get('ooyalaVOD')
if ep:
return {
'_type': 'url_transparent',
'ie_key': 'Ooyala',
'url': 'ooyala:%s' % ep['providerId'],
'id': video_id,
'display_id': display_id,
'title': ep.get('title'),
'description': ep.get('description'),
'thumbnail': ep.get('imageThumbnail'),
}
info = {}
formats = []
subtitles = {}

View file

@ -1,87 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
unified_strdate,
)
class CamWithHerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)'
_TESTS = [{
'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=',
'info_dict': {
'id': '5644',
'ext': 'flv',
'title': 'Periscope Tease',
'description': 'In the clouds teasing on periscope to my favorite song',
'duration': 240,
'view_count': int,
'comment_count': int,
'uploader': 'MileenaK',
'upload_date': '20160322',
'age_limit': 18,
},
'params': {
'skip_download': True,
}
}, {
'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937',
'only_matching': True,
}, {
'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=',
'only_matching': True,
}, {
'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
flv_id = self._html_search_regex(
r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id')
# Video URL construction algorithm is reverse-engineered from cwhplayer.swf
rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % (
('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id)
title = self._html_search_regex(
r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title')
description = self._html_search_regex(
r'>Description:</span>(.+?)</div>', webpage, 'description', default=None)
runtime = self._search_regex(
r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None)
if runtime:
runtime = re.sub(r'[\s-]', '', runtime)
duration = parse_duration(runtime)
view_count = int_or_none(self._search_regex(
r'Views\s*:\s*(\d+)', webpage, 'view count', default=None))
comment_count = int_or_none(self._search_regex(
r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None))
uploader = self._search_regex(
r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None)
upload_date = unified_strdate(self._search_regex(
r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None))
return {
'id': flv_id,
'url': rtmp_url,
'ext': 'flv',
'no_resume': True,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'comment_count': comment_count,
'uploader': uploader,
'upload_date': upload_date,
'age_limit': 18
}

View file

@ -1,105 +0,0 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
format_field,
float_or_none,
int_or_none,
try_get,
)
from .videomore import VideomoreIE
class CarambaTVIE(InfoExtractor):
_VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://video1.carambatv.ru/v/191910501',
'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a',
'info_dict': {
'id': '191910501',
'ext': 'mp4',
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2678.31,
},
}, {
'url': 'carambatv:191910501',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id,
video_id)
title = video['title']
base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id
formats = [{
'url': base_url + f['fn'],
'height': int_or_none(f.get('height')),
'format_id': format_field(f, 'height', '%sp'),
} for f in video['qualities'] if f.get('fn')]
thumbnail = video.get('splash')
duration = float_or_none(try_get(
video, lambda x: x['annotations'][0]['end_time'], compat_str))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
class CarambaTVPageIE(InfoExtractor):
_VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/',
'md5': 'a49fb0ec2ad66503eeb46aac237d3c86',
'info_dict': {
'id': '475222',
'ext': 'flv',
'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)',
'thumbnail': r're:^https?://.*\.jpg',
# duration reported by videomore is incorrect
'duration': int,
},
'add_ie': [VideomoreIE.ie_key()],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
videomore_url = VideomoreIE._extract_url(webpage)
if not videomore_url:
videomore_id = self._search_regex(
r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
default=None)
if videomore_id:
videomore_url = 'videomore:%s' % videomore_id
if videomore_url:
title = self._og_search_title(webpage)
return {
'_type': 'url_transparent',
'url': videomore_url,
'ie_key': VideomoreIE.ie_key(),
'title': title,
}
video_url = self._og_search_property('video:iframe', webpage, default=None)
if not video_url:
video_id = self._search_regex(
r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)',
webpage, 'video id')
video_url = 'carambatv:%s' % video_id
return self.url_result(video_url, CarambaTVIE.ie_key())

View file

@ -1,8 +1,9 @@
import re
import json
import base64
import json
import re
import time
import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor
from ..compat import (
@ -179,6 +180,13 @@ class CBCPlayerIE(InfoExtractor):
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'chapters': [],
'duration': 494.811,
'categories': ['AudioMobile/All in a Weekend Montreal'],
'tags': 'count:8',
'location': 'Quebec',
'series': 'All in a Weekend Montreal',
'season': 'Season 2015',
'season_number': 2015,
'media_type': 'Excerpt',
},
}, {
'url': 'http://www.cbc.ca/player/play/2164402062',
@ -194,25 +202,37 @@ class CBCPlayerIE(InfoExtractor):
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'chapters': [],
'duration': 186.867,
'series': 'CBC News: Windsor at 6:00',
'categories': ['News/Canada/Windsor'],
'location': 'Windsor',
'tags': ['cancer'],
'creator': 'Allison Johnson',
'media_type': 'Excerpt',
},
}, {
# Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
'url': 'http://www.cbc.ca/player/play/2249992771553',
'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd',
'url': 'http://www.cbc.ca/player/play/2284799043667',
'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
'info_dict': {
'id': '2249992771553',
'id': '2284799043667',
'ext': 'mp4',
'title': 'The National | Womens soccer pay, Florida seawater, Swift quake',
'description': 'md5:adba28011a56cfa47a080ff198dad27a',
'timestamp': 1690596000,
'duration': 2716.333,
'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
'timestamp': 1700272800,
'duration': 2718.833,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
'uploader': 'CBCC-NEW',
'chapters': 'count:5',
'upload_date': '20230729',
'upload_date': '20231118',
'categories': 'count:4',
'series': 'The National - Full Show',
'tags': 'count:1',
'creator': 'News',
'location': 'Canada',
'media_type': 'Full Program',
},
}]
@ -387,7 +407,7 @@ def _find_secret_formats(self, formats, video_id):
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
if not secret_xml:
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
return
for child in secret_xml:

View file

@ -1,252 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
parse_iso8601,
qualities,
unescapeHTML,
)
class Channel9IE(InfoExtractor):
IE_DESC = 'Channel 9'
IE_NAME = 'channel9'
_VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
_EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b']
_TESTS = [{
'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
'md5': '32083d4eaf1946db6d454313f44510ca',
'info_dict': {
'id': '6c413323-383a-49dc-88f9-a22800cab024',
'ext': 'wmv',
'title': 'Developer Kick-Off Session: Stuff We Love',
'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
'duration': 4576,
'thumbnail': r're:https?://.*\.jpg',
'timestamp': 1377717420,
'upload_date': '20130828',
'session_code': 'KOS002',
'session_room': 'Arena 1A',
'session_speakers': 'count:5',
},
}, {
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
'info_dict': {
'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
'ext': 'wmv',
'title': 'Self-service BI with Power BI - nuclear testing',
'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
'duration': 1540,
'thumbnail': r're:https?://.*\.jpg',
'timestamp': 1386381991,
'upload_date': '20131207',
'authors': ['Mike Wilmot'],
},
}, {
# low quality mp4 is best
'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
'info_dict': {
'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
'ext': 'mp4',
'title': 'Ranges for the Standard Library',
'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
'duration': 5646,
'thumbnail': r're:https?://.*\.jpg',
'upload_date': '20150930',
'timestamp': 1443640735,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
'info_dict': {
'id': 'Events/DEVintersection/DEVintersection-2016',
'title': 'DEVintersection 2016 Orlando Sessions',
},
'playlist_mincount': 14,
}, {
'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
'only_matching': True,
}, {
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
'only_matching': True,
}]
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
def _extract_list(self, video_id, rss_url=None):
if not rss_url:
rss_url = self._RSS_URL % video_id
rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
entries = [self.url_result(session_url.text, 'Channel9')
for session_url in rss.findall('./channel/item/link')]
title_text = rss.find('./channel/title').text
return self.playlist_result(entries, video_id, title_text)
def _real_extract(self, url):
content_path, rss = self._match_valid_url(url).groups()
if rss:
return self._extract_list(content_path, url)
webpage = self._download_webpage(
url, content_path, 'Downloading web page')
episode_data = self._search_regex(
r"data-episode='([^']+)'", webpage, 'episode data', default=None)
if episode_data:
episode_data = self._parse_json(unescapeHTML(
episode_data), content_path)
content_id = episode_data['contentId']
is_session = '/Sessions(' in episode_data['api']
content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
if is_session:
content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
else:
content_url += 'Authors,Body&$expand=Authors'
content_data = self._download_json(content_url, content_id)
title = content_data['Title']
QUALITIES = (
'mp3',
'wmv', 'mp4',
'wmv-low', 'mp4-low',
'wmv-mid', 'mp4-mid',
'wmv-high', 'mp4-high',
)
quality_key = qualities(QUALITIES)
def quality(quality_id, format_url):
return (len(QUALITIES) if '_Source.' in format_url
else quality_key(quality_id))
formats = []
urls = set()
SITE_QUALITIES = {
'MP3': 'mp3',
'MP4': 'mp4',
'Low Quality WMV': 'wmv-low',
'Low Quality MP4': 'mp4-low',
'Mid Quality WMV': 'wmv-mid',
'Mid Quality MP4': 'mp4-mid',
'High Quality WMV': 'wmv-high',
'High Quality MP4': 'mp4-high',
}
formats_select = self._search_regex(
r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
'formats select', default=None)
if formats_select:
for mobj in re.finditer(
r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
formats_select):
format_url = mobj.group('url')
if format_url in urls:
continue
urls.add(format_url)
format_id = mobj.group('format')
quality_id = SITE_QUALITIES.get(format_id, format_id)
formats.append({
'url': format_url,
'format_id': quality_id,
'quality': quality(quality_id, format_url),
'vcodec': 'none' if quality_id == 'mp3' else None,
})
API_QUALITIES = {
'VideoMP4Low': 'mp4-low',
'VideoWMV': 'wmv-mid',
'VideoMP4Medium': 'mp4-mid',
'VideoMP4High': 'mp4-high',
'VideoWMVHQ': 'wmv-hq',
}
for format_id, q in API_QUALITIES.items():
q_url = content_data.get(format_id)
if not q_url or q_url in urls:
continue
urls.add(q_url)
formats.append({
'url': q_url,
'format_id': q,
'quality': quality(q, q_url),
})
slides = content_data.get('Slides')
zip_file = content_data.get('ZipFile')
if not formats and not slides and not zip_file:
self.raise_no_formats(
'None of recording, slides or zip are available for %s' % content_path)
subtitles = {}
for caption in content_data.get('Captions', []):
caption_url = caption.get('Url')
if not caption_url:
continue
subtitles.setdefault(caption.get('Language', 'en'), []).append({
'url': caption_url,
'ext': 'vtt',
})
common = {
'id': content_id,
'title': title,
'description': clean_html(content_data.get('Description') or content_data.get('Body')),
'thumbnail': content_data.get('VideoPlayerPreviewImage'),
'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
'timestamp': parse_iso8601(content_data.get('PublishedDate')),
'avg_rating': int_or_none(content_data.get('Rating')),
'rating_count': int_or_none(content_data.get('RatingCount')),
'view_count': int_or_none(content_data.get('Views')),
'comment_count': int_or_none(content_data.get('CommentCount')),
'subtitles': subtitles,
}
if is_session:
speakers = []
for s in content_data.get('Speakers', []):
speaker_name = s.get('FullName')
if not speaker_name:
continue
speakers.append(speaker_name)
common.update({
'session_code': content_data.get('Code'),
'session_room': content_data.get('Room'),
'session_speakers': speakers,
})
else:
authors = []
for a in content_data.get('Authors', []):
author_name = a.get('DisplayName')
if not author_name:
continue
authors.append(author_name)
common['authors'] = authors
contents = []
if slides:
d = common.copy()
d.update({'title': title + '-Slides', 'url': slides})
contents.append(d)
if zip_file:
d = common.copy()
d.update({'title': title + '-Zip', 'url': zip_file})
contents.append(d)
if formats:
d = common.copy()
d.update({'title': title, 'formats': formats})
contents.append(d)
return self.playlist_result(contents)
else:
return self._extract_list(content_path)

Some files were not shown because too many files have changed in this diff Show more