Merge branch 'master' into GoogleDriveFolderFix

This may fix the failing ci
This commit is contained in:
grqx 2024-10-02 17:43:36 +13:00
commit b3534df159
117 changed files with 3613 additions and 1161 deletions

View file

@ -77,3 +77,11 @@ body:
render: shell render: shell
validations: validations:
required: true required: true
- type: markdown
attributes:
value: |
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -89,3 +89,11 @@ body:
render: shell render: shell
validations: validations:
required: true required: true
- type: markdown
attributes:
value: |
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -85,3 +85,11 @@ body:
render: shell render: shell
validations: validations:
required: true required: true
- type: markdown
attributes:
value: |
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -70,3 +70,11 @@ body:
render: shell render: shell
validations: validations:
required: true required: true
- type: markdown
attributes:
value: |
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -64,3 +64,11 @@ body:
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>
render: shell render: shell
- type: markdown
attributes:
value: |
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -70,3 +70,11 @@ body:
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>
render: shell render: shell
- type: markdown
attributes:
value: |
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.

View file

@ -266,7 +266,7 @@ jobs:
# We need to ignore wheels otherwise we break universal2 builds # We need to ignore wheels otherwise we break universal2 builds
python3 -m pip install -U --no-binary :all: -r requirements.txt python3 -m pip install -U --no-binary :all: -r requirements.txt
# We need to fuse our own universal2 wheels for curl_cffi # We need to fuse our own universal2 wheels for curl_cffi
python3 -m pip install -U delocate python3 -m pip install -U 'delocate==0.11.0'
mkdir curl_cffi_whls curl_cffi_universal2 mkdir curl_cffi_whls curl_cffi_universal2
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
@ -409,7 +409,7 @@ jobs:
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include curl-cffi python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.7.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -469,7 +469,7 @@ jobs:
run: | run: |
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |

View file

@ -55,6 +55,7 @@ jobs:
- name: Install test requirements - name: Install test requirements
run: python3 ./devscripts/install_deps.py --include test --include curl-cffi run: python3 ./devscripts/install_deps.py --include test --include curl-cffi
- name: Run tests - name: Run tests
timeout-minutes: 15
continue-on-error: False continue-on-error: False
run: | run: |
python3 -m yt_dlp -v || true # Print debug head python3 -m yt_dlp -v || true # Print debug head

21
.github/workflows/issue-lockdown.yml vendored Normal file
View file

@ -0,0 +1,21 @@
name: Issue Lockdown
on:
issues:
types: [opened]
permissions:
issues: write
jobs:
lockdown:
name: Issue Lockdown
if: vars.ISSUE_LOCKDOWN
runs-on: ubuntu-latest
steps:
- name: "Lock new issue"
env:
GH_TOKEN: ${{ github.token }}
ISSUE_NUMBER: ${{ github.event.issue.number }}
REPOSITORY: ${{ github.repository }}
run: |
gh issue lock "${ISSUE_NUMBER}" -R "${REPOSITORY}"

View file

@ -15,8 +15,9 @@ jobs:
with: with:
python-version: '3.8' python-version: '3.8'
- name: Install test requirements - name: Install test requirements
run: python3 ./devscripts/install_deps.py --include test run: python3 ./devscripts/install_deps.py -o --include test
- name: Run tests - name: Run tests
timeout-minutes: 15
run: | run: |
python3 -m yt_dlp -v || true python3 -m yt_dlp -v || true
python3 ./devscripts/run_tests.py core python3 ./devscripts/run_tests.py core

View file

@ -204,7 +204,7 @@ jobs:
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add -u git add -u
git commit -m "Release ${{ env.version }}" \ git commit -m "Release ${{ env.version }}" \
-m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all"
git push origin --force ${{ github.event.ref }}:release git push origin --force ${{ github.event.ref }}:release
- name: Get target commitish - name: Get target commitish
@ -325,7 +325,7 @@ jobs:
"(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES "(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES
printf '\n\n' >> ./RELEASE_NOTES printf '\n\n' >> ./RELEASE_NOTES
cat >> ./RELEASE_NOTES << EOF cat >> ./RELEASE_NOTES << EOF
#### A description of the various files are in the [README](https://github.com/${{ github.repository }}#release-files) #### A description of the various files is in the [README](https://github.com/${{ github.repository }}#release-files)
--- ---
$(python ./devscripts/make_changelog.py -vv --collapsible) $(python ./devscripts/make_changelog.py -vv --collapsible)
EOF EOF

17
.github/workflows/sanitize-comment.yml vendored Normal file
View file

@ -0,0 +1,17 @@
name: Sanitize comment
on:
issue_comment:
types: [created, edited]
permissions:
issues: write
jobs:
sanitize-comment:
name: Sanitize comment
if: vars.SANITIZE_COMMENT && !github.event.issue.pull_request
runs-on: ubuntu-latest
steps:
- name: Sanitize comment
uses: yt-dlp/sanitize-comment@v1

View file

@ -653,3 +653,23 @@ LeSuisse
DunnesH DunnesH
iancmy iancmy
mokrueger mokrueger
luvyana
szantnerb
hugepower
scribblemaniac
Codenade
Demon000
Deukhoofd
grqz
hibes
Khaoklong51
kieraneglin
lengzuo
naglis
ndyanx
otovalek
quad
rakslice
sahilsinghss73
tony-hn
xingchensong

View file

@ -4,6 +4,150 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2024.09.27
#### Important changes
- **The minimum *recommended* Python version has been raised to 3.9**
Since Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)
#### Core changes
- [Allow `none` arg to negate `--convert-subs` and `--convert-thumbnails`](https://github.com/yt-dlp/yt-dlp/commit/c08e0b20b5edd8957b8318716bc14e896d1b96f4) ([#11066](https://github.com/yt-dlp/yt-dlp/issues/11066)) by [kieraneglin](https://github.com/kieraneglin)
- [Fix format sorting bug with vp9.2 vcodec](https://github.com/yt-dlp/yt-dlp/commit/8f4ea14680c7865d8ffac10a9174205d1d84ada7) ([#10884](https://github.com/yt-dlp/yt-dlp/issues/10884)) by [rakslice](https://github.com/rakslice)
- [Raise minimum recommended Python version to 3.9](https://github.com/yt-dlp/yt-dlp/commit/cca534cd9e6850c70244f225a4a1895ef4bcdbec) ([#11098](https://github.com/yt-dlp/yt-dlp/issues/11098)) by [bashonly](https://github.com/bashonly)
- **cookies**: [Improve error message for Windows `--cookies-from-browser chrome` issue](https://github.com/yt-dlp/yt-dlp/commit/b397a64691421ace5df09457c2a764821a2dc6f2) ([#11090](https://github.com/yt-dlp/yt-dlp/issues/11090)) by [seproDev](https://github.com/seproDev)
- **utils**: `mimetype2ext`: [Recognize `aacp` as `aac`](https://github.com/yt-dlp/yt-dlp/commit/cc85596d5b59f0c14e9381b3675f619c1e12e597) ([#10860](https://github.com/yt-dlp/yt-dlp/issues/10860)) by [bashonly](https://github.com/bashonly)
#### Extractor changes
- [Fix JW Player format parsing](https://github.com/yt-dlp/yt-dlp/commit/409f8e9e3b4bde81ef76fc563256f876d2ff8099) ([#10956](https://github.com/yt-dlp/yt-dlp/issues/10956)) by [seproDev](https://github.com/seproDev)
- [Handle decode errors when reading responses](https://github.com/yt-dlp/yt-dlp/commit/325001317d97f4545d66fac44c4ba772c6f45f22) ([#10868](https://github.com/yt-dlp/yt-dlp/issues/10868)) by [bashonly](https://github.com/bashonly)
- **abc.net.au**: iview, showseries: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7f909046f4dc0fba472b4963145aef6e0d42491b) ([#11101](https://github.com/yt-dlp/yt-dlp/issues/11101)) by [bashonly](https://github.com/bashonly)
- **adn**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/cc88a54bb1ef285154775f8a6a413335ce4c71ce) ([#10749](https://github.com/yt-dlp/yt-dlp/issues/10749)) by [infanf](https://github.com/infanf)
- **asobistage**: [Support redirected URLs](https://github.com/yt-dlp/yt-dlp/commit/a7d3235c84dac57a127cbe0ff38f7f7c2fdd8fa0) ([#10768](https://github.com/yt-dlp/yt-dlp/issues/10768)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **bandcamp**: user: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0176547f16a3642cd71627126e9dfc24981e20) ([#10328](https://github.com/yt-dlp/yt-dlp/issues/10328)) by [bashonly](https://github.com/bashonly), [quad](https://github.com/quad)
- **beacon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b4760c778d0c92c6e3f2bc8346cd72c8f08595ae) ([#9901](https://github.com/yt-dlp/yt-dlp/issues/9901)) by [Deukhoofd](https://github.com/Deukhoofd)
- **bilibili**
- [Fix chapters and subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/a2000bc85730c950351d78bb818493dc39dca3cb) ([#11099](https://github.com/yt-dlp/yt-dlp/issues/11099)) by [bashonly](https://github.com/bashonly)
- [Fix festival URL support](https://github.com/yt-dlp/yt-dlp/commit/b43bd864851f2862e26caa85461c5d825d49d463) ([#10740](https://github.com/yt-dlp/yt-dlp/issues/10740)) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz)
- **biliintl**: [Fix referer header](https://github.com/yt-dlp/yt-dlp/commit/a06bb586795ebab87a2356923acfc674d6f0e152) ([#11003](https://github.com/yt-dlp/yt-dlp/issues/11003)) by [Khaoklong51](https://github.com/Khaoklong51)
- **dropbox**: [Fix password-protected video support](https://github.com/yt-dlp/yt-dlp/commit/63da31b3b29af90062d8a72a905ffe4b5e499042) ([#10735](https://github.com/yt-dlp/yt-dlp/issues/10735)) by [ndyanx](https://github.com/ndyanx)
- **ertgr**: [Fix video extraction](https://github.com/yt-dlp/yt-dlp/commit/416686ed0cf792ec44ab059f3b229dd776077e14) ([#11091](https://github.com/yt-dlp/yt-dlp/issues/11091)) by [seproDev](https://github.com/seproDev)
- **eurosport**: [Support local URL variants](https://github.com/yt-dlp/yt-dlp/commit/f0bb28504c8c2b75ee3e5796aed50de2a7f90a1b) ([#10785](https://github.com/yt-dlp/yt-dlp/issues/10785)) by [seproDev](https://github.com/seproDev)
- **facebook**
- ads: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d62fef7e07d454c0d2ba2d69fb96d691dba1ded0) ([#10704](https://github.com/yt-dlp/yt-dlp/issues/10704)) by [kclauhk](https://github.com/kclauhk)
- reel: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0e1b941c6b2caa688b0d3332e723d16dbafa4311) by [lengzuo](https://github.com/lengzuo)
- **germanupa**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/124f058b546d652a359c67025bb479789bfbef0b) ([#10538](https://github.com/yt-dlp/yt-dlp/issues/10538)) by [grqz](https://github.com/grqz)
- **hgtvde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a555389c9bb32e589e00b4664974423fb7b04dcd) ([#10992](https://github.com/yt-dlp/yt-dlp/issues/10992)) by [bashonly](https://github.com/bashonly), [rdamas](https://github.com/rdamas)
- **huya**: video: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/25c1cdaa2650563494d3bf00a38f72d0d9486bff) ([#10686](https://github.com/yt-dlp/yt-dlp/issues/10686)) by [hugepower](https://github.com/hugepower)
- **iprima**: [Fix zoom URL support](https://github.com/yt-dlp/yt-dlp/commit/4a27b8f092f7f7c10b7a334d3535c97c2af02f0a) ([#10959](https://github.com/yt-dlp/yt-dlp/issues/10959)) by [otovalek](https://github.com/otovalek)
- **khanacademy**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0fba08485b6445b72b5b63ae23ca2a73fa5d967f) ([#10913](https://github.com/yt-dlp/yt-dlp/issues/10913)) by [seproDev](https://github.com/seproDev)
- **kick**
- clips: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/0aa4426e9a35f7f8e184f1f2082b3b313c1448f7) ([#11107](https://github.com/yt-dlp/yt-dlp/issues/11107)) by [bashonly](https://github.com/bashonly)
- vod: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/173d54c151b987409e3eb09552d8d89ed8fc50f7) ([#10988](https://github.com/yt-dlp/yt-dlp/issues/10988)) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz)
- **kika**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e6f48ca80821939c1fd11ec2a0cdbf2fba9b258a) ([#5788](https://github.com/yt-dlp/yt-dlp/issues/5788)) by [1100101](https://github.com/1100101)
- **lnkgo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/fa83d0b36bc43d30fe9241c1e923f4614864b758) ([#10904](https://github.com/yt-dlp/yt-dlp/issues/10904)) by [naglis](https://github.com/naglis)
- **loom**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/7509d692b37a7ec6230ea75bfe1e44a8de5eefce) ([#10760](https://github.com/yt-dlp/yt-dlp/issues/10760)) by [kclauhk](https://github.com/kclauhk)
- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e2b3634e299be9c16a247ece3b1858d83889c324) ([#11083](https://github.com/yt-dlp/yt-dlp/issues/11083)) by [szantnerb](https://github.com/szantnerb)
- **mojevideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/28b0ecba2af5b4919f198474b3d00a76ef322c31) ([#11019](https://github.com/yt-dlp/yt-dlp/issues/11019)) by [04-pasha-04](https://github.com/04-pasha-04), [pzhlkj6612](https://github.com/pzhlkj6612)
- **niconico**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/eabb4680fdb09ba1f48d174a700a2e3b43f82add) ([#11103](https://github.com/yt-dlp/yt-dlp/issues/11103)) by [bashonly](https://github.com/bashonly)
- **nzz**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4a9bc8c3630378bc29f0266126b503f6190c0430) ([#10461](https://github.com/yt-dlp/yt-dlp/issues/10461)) by [1-Byte](https://github.com/1-Byte)
- **patreoncampaign**: [Support API URLs](https://github.com/yt-dlp/yt-dlp/commit/232e6db30c474d1b387e405342f34173ceeaf832) ([#10734](https://github.com/yt-dlp/yt-dlp/issues/10734)) by [bashonly](https://github.com/bashonly), [hibes](https://github.com/hibes)
- **pinterest**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8c078fe28b0ffc15ef9646346c00c592fe71a78) ([#10867](https://github.com/yt-dlp/yt-dlp/issues/10867)) by [bashonly](https://github.com/bashonly), [sahilsinghss73](https://github.com/sahilsinghss73)
- **radiko**: [Extract unique `id` values](https://github.com/yt-dlp/yt-dlp/commit/c8d096c5ce111411fbdbe2abb8fed54f317a6182) ([#10726](https://github.com/yt-dlp/yt-dlp/issues/10726)) by [garret1317](https://github.com/garret1317)
- **rtp**: [Support more subpages](https://github.com/yt-dlp/yt-dlp/commit/d02df303d8e49390599db9f34482697e4d1cf5b2) ([#10787](https://github.com/yt-dlp/yt-dlp/issues/10787)) by [Demon000](https://github.com/Demon000)
- **rumblechannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ad0b857f459a6d390fbf124183916218c52f223a) ([#11049](https://github.com/yt-dlp/yt-dlp/issues/11049)) by [tony-hn](https://github.com/tony-hn)
- **rutube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/41be32e78c3845000dbac188ffb90ea3ea7c4dfa) ([#10844](https://github.com/yt-dlp/yt-dlp/issues/10844)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **samplefocus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/46f4c80bc363ee8116c33d37f65202e6c3470954) ([#10947](https://github.com/yt-dlp/yt-dlp/issues/10947)) by [seproDev](https://github.com/seproDev)
- **screenrec**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/36f9e602ad55679764bc75a4f67f7562b1d6adcf) ([#10917](https://github.com/yt-dlp/yt-dlp/issues/10917)) by [naglis](https://github.com/naglis)
- **sen**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/41a241ca6ffb95b3d9aaf4f42106ca8cba9af1a6) ([#10952](https://github.com/yt-dlp/yt-dlp/issues/10952)) by [seproDev](https://github.com/seproDev)
- **servus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/300c91274f7ea5b1b0528fc5ee11cf1a61d4079e) ([#10944](https://github.com/yt-dlp/yt-dlp/issues/10944)) by [seproDev](https://github.com/seproDev)
- **snapchatspotlight**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b37417e4f934fd8909788b493d017777155b0ae5) ([#11030](https://github.com/yt-dlp/yt-dlp/issues/11030)) by [seproDev](https://github.com/seproDev)
- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a8a05aebb49693e78e1123015837ed5e961ff76) ([#11010](https://github.com/yt-dlp/yt-dlp/issues/11010)) by [diman8](https://github.com/diman8)
- **tenplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d8d473002b654ab0e7b97ead869f58b4361eeae1) ([#10928](https://github.com/yt-dlp/yt-dlp/issues/10928)) by [aarubui](https://github.com/aarubui)
- **tiktok**: [Fix web formats extraction](https://github.com/yt-dlp/yt-dlp/commit/3ad0b7f422d547204df687b6d0b2d9110fff3990) ([#11074](https://github.com/yt-dlp/yt-dlp/issues/11074)) by [bashonly](https://github.com/bashonly)
- **twitter**: spaces: [Support video spaces](https://github.com/yt-dlp/yt-dlp/commit/bef1d4d6fc9493fda7f75e2289c07c507d10092f) ([#10789](https://github.com/yt-dlp/yt-dlp/issues/10789)) by [bashonly](https://github.com/bashonly)
- **vidflex**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e978c312d6550a6ae4c9df18001afb1b420cb72f) ([#10002](https://github.com/yt-dlp/yt-dlp/issues/10002)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **vimeo**
- [Always try to extract original format](https://github.com/yt-dlp/yt-dlp/commit/4115c24d157c5b5f63089d75c4e0f51d1f8b4489) ([#10721](https://github.com/yt-dlp/yt-dlp/issues/10721)) by [bashonly](https://github.com/bashonly) (With fixes in [e8e6a98](https://github.com/yt-dlp/yt-dlp/commit/e8e6a982a1b659eed434d225d7922f632bac6568) by [seproDev](https://github.com/seproDev))
- [Fix HLS audio format sorting](https://github.com/yt-dlp/yt-dlp/commit/a1b4ac2b8ed8e6eaa56044d439f1e0d00c2ba218) ([#11082](https://github.com/yt-dlp/yt-dlp/issues/11082)) by [fireattack](https://github.com/fireattack)
- **watchespn**: [Improve auth support](https://github.com/yt-dlp/yt-dlp/commit/7adff8caf152dcf96d03aff69ed8545c0a63567c) ([#10910](https://github.com/yt-dlp/yt-dlp/issues/10910)) by [ischmidt20](https://github.com/ischmidt20)
- **wistia**: [Support password-protected videos](https://github.com/yt-dlp/yt-dlp/commit/9f5c9a90898c5a1e672922d9cd799716c73cee34) ([#11100](https://github.com/yt-dlp/yt-dlp/issues/11100)) by [bashonly](https://github.com/bashonly)
- **ximalaya**: [Add VIP support](https://github.com/yt-dlp/yt-dlp/commit/3dfd720d098b4d49d69cfc77e6376f22bcd90934) ([#10832](https://github.com/yt-dlp/yt-dlp/issues/10832)) by [seproDev](https://github.com/seproDev), [xingchensong](https://github.com/xingchensong)
- **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3aa0156e05662923d130ddbc1c82596e38c01a00) ([#10950](https://github.com/yt-dlp/yt-dlp/issues/10950)) by [seproDev](https://github.com/seproDev)
- **yleareena**: [Support podcasts](https://github.com/yt-dlp/yt-dlp/commit/48d629d461e05b1b19f5e53dc959bb9ebe95da42) ([#11104](https://github.com/yt-dlp/yt-dlp/issues/11104)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Add `po_token`, `visitor_data`, `data_sync_id` extractor args](https://github.com/yt-dlp/yt-dlp/commit/3a3bd00037e9908e87da4fa9f2ad772aa34dc60e) ([#10648](https://github.com/yt-dlp/yt-dlp/issues/10648)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [seproDev](https://github.com/seproDev) (With fixes in [fa2be9a](https://github.com/yt-dlp/yt-dlp/commit/fa2be9a7c63babede07480151363e54eee5702bd) by [bashonly](https://github.com/bashonly))
- [Support excluding `player_client`s in extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/49f3741a820ed142f6866317c2e7d247b130960e) ([#10710](https://github.com/yt-dlp/yt-dlp/issues/10710)) by [bashonly](https://github.com/bashonly)
- clip: [Prioritize `https` formats](https://github.com/yt-dlp/yt-dlp/commit/1d84b780cf33a1d84756825ac23f990a905703df) ([#11102](https://github.com/yt-dlp/yt-dlp/issues/11102)) by [bashonly](https://github.com/bashonly)
- tab: [Fix shorts tab extraction](https://github.com/yt-dlp/yt-dlp/commit/9431777b4c37129a6093080c77ca59960afbb9d7) ([#10938](https://github.com/yt-dlp/yt-dlp/issues/10938)) by [seproDev](https://github.com/seproDev)
#### Networking changes
- [Fix handler not being added to RequestError](https://github.com/yt-dlp/yt-dlp/commit/d1c4d88b2d912e8da5e76db455562ca63b1af690) ([#10955](https://github.com/yt-dlp/yt-dlp/issues/10955)) by [coletdjnz](https://github.com/coletdjnz)
- [Pin `curl-cffi` version to < 0.7.2](https://github.com/yt-dlp/yt-dlp/commit/5bb1aa04dafce13ba9de707ea53169fab58b5207) ([#11092](https://github.com/yt-dlp/yt-dlp/issues/11092)) by [bashonly](https://github.com/bashonly)
- **Request Handler**: websockets: [Upgrade websockets to 13.0](https://github.com/yt-dlp/yt-dlp/commit/6f9e6537434562d513d0c9b68ced8a61ade94a64) ([#10815](https://github.com/yt-dlp/yt-dlp/issues/10815)) by [coletdjnz](https://github.com/coletdjnz)
#### Misc. changes
- **build**
- [Bump PyInstaller version pin to `>=6.10.0`](https://github.com/yt-dlp/yt-dlp/commit/fb8b7f226d251e521a89b23c415e249e5b788e5c) ([#10709](https://github.com/yt-dlp/yt-dlp/issues/10709)) by [bashonly](https://github.com/bashonly)
- [Pin `delocate` version for `macos`](https://github.com/yt-dlp/yt-dlp/commit/7e41628ff523b3fe373b0981a5db441358980dab) ([#10901](https://github.com/yt-dlp/yt-dlp/issues/10901)) by [bashonly](https://github.com/bashonly)
- **ci**
- [Add comment sanitization workflow](https://github.com/yt-dlp/yt-dlp/commit/b6200bdcf3a9415ae36859188f9a57e3e461c696) ([#10915](https://github.com/yt-dlp/yt-dlp/issues/10915)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- [Add issue tracker anti-spam protection](https://github.com/yt-dlp/yt-dlp/commit/ad9a8115aa29a1a95c961b16fcf129a228d98f50) ([#10861](https://github.com/yt-dlp/yt-dlp/issues/10861)) by [bashonly](https://github.com/bashonly)
- **cleanup**: Miscellaneous: [c6387ab](https://github.com/yt-dlp/yt-dlp/commit/c6387abc1af9842bb0541288a5610abba9b1ab51) by [bashonly](https://github.com/bashonly), [Codenade](https://github.com/Codenade), [coletdjnz](https://github.com/coletdjnz), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [pzhlkj6612](https://github.com/pzhlkj6612), [seproDev](https://github.com/seproDev)
### 2024.08.06
#### Core changes
- **jsinterp**: [Improve `slice` implementation](https://github.com/yt-dlp/yt-dlp/commit/bb8bf1db993f59752d20b73b861bd55e40cf0e31) ([#10664](https://github.com/yt-dlp/yt-dlp/issues/10664)) by [seproDev](https://github.com/seproDev)
#### Extractor changes
- **discoveryplusitaly**: [Support sport and olympics URLs](https://github.com/yt-dlp/yt-dlp/commit/e7d73bc4531ee3f91a46b15e218dcc1fbeb6226c) ([#10655](https://github.com/yt-dlp/yt-dlp/issues/10655)) by [bashonly](https://github.com/bashonly)
- **gem.cbc.ca**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc5eecfa31c9571b6031cc3968aaa0394be55d7a) ([#10565](https://github.com/yt-dlp/yt-dlp/issues/10565)) by [bashonly](https://github.com/bashonly), [scribblemaniac](https://github.com/scribblemaniac)
- **niconico**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4d9231208332d4c32364b8cd814bff8b20232cae) ([#10677](https://github.com/yt-dlp/yt-dlp/issues/10677)) by [bashonly](https://github.com/bashonly)
- **olympics**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/919540a9644e55deb78cdd6751757ec8fdaf76f4) ([#10625](https://github.com/yt-dlp/yt-dlp/issues/10625)) by [bashonly](https://github.com/bashonly)
- **youku**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0088c6de23d832b117061a33e984dc452d992e9c) ([#10626](https://github.com/yt-dlp/yt-dlp/issues/10626)) by [hugepower](https://github.com/hugepower)
- **youtube**
- [Change default player clients to `ios,web_creator`](https://github.com/yt-dlp/yt-dlp/commit/406f4c2e47502fffc1b0c210b4ee6487c89a44cb) ([#10674](https://github.com/yt-dlp/yt-dlp/issues/10674)) by [bashonly](https://github.com/bashonly)
- [Fix `n` function name extraction for player `b12cc44b`](https://github.com/yt-dlp/yt-dlp/commit/c86891eb9434b4d7eec426d38c0c625b5e13cb2f) ([#10668](https://github.com/yt-dlp/yt-dlp/issues/10668)) by [seproDev](https://github.com/seproDev)
### 2024.08.01
#### Core changes
- **utils**: `unified_timestamp`: [Recognize Sunday](https://github.com/yt-dlp/yt-dlp/commit/6daf2c27c0464fba98337be30de0b66d520d0db1) ([#10589](https://github.com/yt-dlp/yt-dlp/issues/10589)) by [bashonly](https://github.com/bashonly)
#### Extractor changes
- **abematv**: [Fix availability extraction](https://github.com/yt-dlp/yt-dlp/commit/ef36d517f9b05785d61abca7691d9ab7d63cc75c) ([#10569](https://github.com/yt-dlp/yt-dlp/issues/10569)) by [middlingphys](https://github.com/middlingphys)
- **cbc.ca**: player: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/94a1c5e642e468cebeb51f74c6c220434cb47d96) ([#10302](https://github.com/yt-dlp/yt-dlp/issues/10302)) by [bashonly](https://github.com/bashonly), [trainman261](https://github.com/trainman261)
- **discoveryplus**: [Support olympics URLs](https://github.com/yt-dlp/yt-dlp/commit/0b7728618417e1aa382722a4d29b916b594d4459) ([#10566](https://github.com/yt-dlp/yt-dlp/issues/10566)) by [bashonly](https://github.com/bashonly)
- **kick**: clips: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/bb3936ae2b3ce96d0b53f9e17cad1082058f032b) ([#10572](https://github.com/yt-dlp/yt-dlp/issues/10572)) by [luvyana](https://github.com/luvyana)
- **learningonscreen**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/fe15d3178e242803ae7a934b90137f13598eba2e) ([#10590](https://github.com/yt-dlp/yt-dlp/issues/10590)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7e3e4779ad13e4511c9ba3869879e53f0267bd7a) ([#10605](https://github.com/yt-dlp/yt-dlp/issues/10605)) by [szantnerb](https://github.com/szantnerb)
- **mlbtv**: [Fix makeup game extraction](https://github.com/yt-dlp/yt-dlp/commit/4b69e1b53ea21e631cd5dd68ff531e2f1671ec17) ([#10607](https://github.com/yt-dlp/yt-dlp/issues/10607)) by [bashonly](https://github.com/bashonly)
- **olympics**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2f1ddfe12a2c174bc777264c5c8ffe7ca0922d94) ([#10604](https://github.com/yt-dlp/yt-dlp/issues/10604)) by [bashonly](https://github.com/bashonly)
- **tva**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/28d485714fef88937c82635438afba5db81f9089) ([#10567](https://github.com/yt-dlp/yt-dlp/issues/10567)) by [bashonly](https://github.com/bashonly)
- **tver**: [Support olympic URLs](https://github.com/yt-dlp/yt-dlp/commit/5260696b1cba77161828941fdb38f09f14ac6c60) ([#10600](https://github.com/yt-dlp/yt-dlp/issues/10600)) by [vvto33](https://github.com/vvto33)
- **vimeo**: review: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/2b6df93a243bdfb9d6bb5c1e18020625cd02d465) ([#10598](https://github.com/yt-dlp/yt-dlp/issues/10598)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Change default player clients to `ios,tv`](https://github.com/yt-dlp/yt-dlp/commit/efb42763dec23ccf6a2e3bac3afbfefce8efd012) ([#10457](https://github.com/yt-dlp/yt-dlp/issues/10457)) by [seproDev](https://github.com/seproDev)
- [Fix `n` function name extraction for player `20dfca59`](https://github.com/yt-dlp/yt-dlp/commit/011b4a04db2a636c3ef0a0ad4e2d3ae482c9fd76) ([#10611](https://github.com/yt-dlp/yt-dlp/issues/10611)) by [bashonly](https://github.com/bashonly)
- [Fix age-verification workaround](https://github.com/yt-dlp/yt-dlp/commit/d19fcb934269465fd707e68a87f735ec6983e93d) ([#10610](https://github.com/yt-dlp/yt-dlp/issues/10610)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/0e539617a41913c7da1edd74fb6543c10ad727b3) ([#10573](https://github.com/yt-dlp/yt-dlp/issues/10573)) by [bashonly](https://github.com/bashonly)
#### Misc. changes
- **cleanup**: Miscellaneous: [ffd7781](https://github.com/yt-dlp/yt-dlp/commit/ffd7781d6588926f820b44a34b9e6e3068fb9f97) by [bashonly](https://github.com/bashonly)
### 2024.07.25
#### Extractor changes
- **abematv**: [Adapt key retrieval to request handler framework](https://github.com/yt-dlp/yt-dlp/commit/a3bab4752a2b3d56e5a59b4e0411bb8f695c010b) ([#10491](https://github.com/yt-dlp/yt-dlp/issues/10491)) by [bashonly](https://github.com/bashonly)
- **facebook**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1a34a802f44a1dab8f642c79c3cc810e21541d3b) ([#10531](https://github.com/yt-dlp/yt-dlp/issues/10531)) by [bashonly](https://github.com/bashonly)
- **mlbtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f0993391e6052ec8f7aacc286609564f226943b9) ([#10515](https://github.com/yt-dlp/yt-dlp/issues/10515)) by [bashonly](https://github.com/bashonly)
- **tiktok**: [Fix and deprioritize JSON subtitles](https://github.com/yt-dlp/yt-dlp/commit/2f97779f335ac069ecccd9c7bf81abf4a83cfe7a) ([#10516](https://github.com/yt-dlp/yt-dlp/issues/10516)) by [bashonly](https://github.com/bashonly)
- **vimeo**: [Fix chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/a0a1bc3d8d8e3bb9a48a06e835815a0460e90e77) ([#10544](https://github.com/yt-dlp/yt-dlp/issues/10544)) by [bashonly](https://github.com/bashonly)
- **youtube**: [Fix `n` function name extraction for player `3400486c`](https://github.com/yt-dlp/yt-dlp/commit/713b4cd18f00556771af8cfdd9cea6cc1a09e948) ([#10542](https://github.com/yt-dlp/yt-dlp/issues/10542)) by [bashonly](https://github.com/bashonly)
#### Misc. changes
- **build**: [Pin `setuptools` version](https://github.com/yt-dlp/yt-dlp/commit/e046db8a116b1c320d4785daadd48ea0b22a3987) ([#10493](https://github.com/yt-dlp/yt-dlp/issues/10493)) by [bashonly](https://github.com/bashonly)
### 2024.07.16 ### 2024.07.16
#### Core changes #### Core changes

View file

@ -200,7 +200,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) * [**curl_cffi**](https://github.com/lexiforest/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lexiforest/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/lexiforest/curl_cffi/blob/main/LICENSE)
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
* Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds * Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds
@ -459,17 +459,17 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
conditions. Use a "\" to escape "&" or conditions. Use a "\" to escape "&" or
quotes if needed. If used multiple times, quotes if needed. If used multiple times,
the filter matches if at least one of the the filter matches if at least one of the
conditions is met. E.g. --match-filter conditions is met. E.g. --match-filters
!is_live --match-filter "like_count>?100 & !is_live --match-filters "like_count>?100 &
description~='(?i)\bcats \& dogs\b'" matches description~='(?i)\bcats \& dogs\b'" matches
only videos that are not live OR those that only videos that are not live OR those that
have a like count more than 100 (or the like have a like count more than 100 (or the like
field is not available) and also has a field is not available) and also has a
description that contains the phrase "cats & description that contains the phrase "cats &
dogs" (caseless). Use "--match-filter -" to dogs" (caseless). Use "--match-filters -" to
interactively ask whether to download each interactively ask whether to download each
video video
--no-match-filters Do not use any --match-filter (default) --no-match-filters Do not use any --match-filters (default)
--break-match-filters FILTER Same as "--match-filters" but stops the --break-match-filters FILTER Same as "--match-filters" but stops the
download process when a video is rejected download process when a video is rejected
--no-break-match-filters Do not use any --break-match-filters (default) --no-break-match-filters Do not use any --break-match-filters (default)
@ -490,7 +490,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
encountering a file that is in the archive encountering a file that is in the archive
(default) (default)
--break-per-input Alters --max-downloads, --break-on-existing, --break-per-input Alters --max-downloads, --break-on-existing,
--break-match-filter, and autonumber to --break-match-filters, and autonumber to
reset per input URL reset per input URL
--no-break-per-input --break-on-existing and similar options --no-break-per-input --break-on-existing and similar options
terminates the entire download queue terminates the entire download queue
@ -999,12 +999,16 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
be used multiple times be used multiple times
--no-exec Remove any previously defined --exec --no-exec Remove any previously defined --exec
--convert-subs FORMAT Convert the subtitles to another format --convert-subs FORMAT Convert the subtitles to another format
(currently supported: ass, lrc, srt, vtt) (currently supported: ass, lrc, srt, vtt).
(Alias: --convert-subtitles) Use "--convert-subs none" to disable
conversion (default) (Alias: --convert-
subtitles)
--convert-thumbnails FORMAT Convert the thumbnails to another format --convert-thumbnails FORMAT Convert the thumbnails to another format
(currently supported: jpg, png, webp). You (currently supported: jpg, png, webp). You
can specify multiple rules using similar can specify multiple rules using similar
syntax as --remux-video syntax as "--remux-video". Use "--convert-
thumbnails none" to disable conversion
(default)
--split-chapters Split video into multiple files based on --split-chapters Split video into multiple files based on
internal chapters. The "chapter:" prefix can internal chapters. The "chapter:" prefix can
be used with "--paths" and "--output" to set be used with "--paths" and "--output" to set
@ -1758,7 +1762,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-"
# EXTRACTOR ARGUMENTS # EXTRACTOR ARGUMENTS
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;formats=incomplete" --extractor-args "funimation:version=uncut"` Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"`
Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"`
@ -1767,7 +1771,7 @@ The following extractors use this feature:
#### youtube #### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mediaconnect`, `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. * `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@ -1775,8 +1779,11 @@ The following extractors use this feature:
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests * `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning * `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage`
* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID)
* `po_token`: Proof of Origin (PO) Token(s) to use for requesting video playback. Comma seperated list of PO Tokens in the format `CLIENT+PO_TOKEN`, e.g. `youtube:po_token=web+XXX,android+YYY`
#### youtubetab (YouTube playlists, channels, feeds, etc.) #### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
@ -2177,9 +2184,9 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` * **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc * **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filters` etc
* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc * **Improvements**: Regex and other operators in `--format`/`--match-filters`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details * **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details
@ -2220,7 +2227,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
* `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi`
* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior
* ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~ * ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~
* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this * yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filters` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this
* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values * yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
* The sub-modules `swfinterp`, `casefold` are removed. * The sub-modules `swfinterp`, `casefold` are removed.
@ -2266,11 +2273,11 @@ While these options are redundant, they are still expected to be used due to the
--get-thumbnail --print thumbnail --get-thumbnail --print thumbnail
-e, --get-title --print title -e, --get-title --print title
-g, --get-url --print urls -g, --get-url --print urls
--match-title REGEX --match-filter "title ~= (?i)REGEX" --match-title REGEX --match-filters "title ~= (?i)REGEX"
--reject-title REGEX --match-filter "title !~= (?i)REGEX" --reject-title REGEX --match-filters "title !~= (?i)REGEX"
--min-views COUNT --match-filter "view_count >=? COUNT" --min-views COUNT --match-filters "view_count >=? COUNT"
--max-views COUNT --match-filter "view_count <=? COUNT" --max-views COUNT --match-filters "view_count <=? COUNT"
--break-on-reject Use --break-match-filter --break-on-reject Use --break-match-filters
--user-agent UA --add-header "User-Agent:UA" --user-agent UA --add-header "User-Agent:UA"
--referer URL --add-header "Referer:URL" --referer URL --add-header "Referer:URL"
--playlist-start NUMBER -I NUMBER: --playlist-start NUMBER -I NUMBER:

View file

@ -185,5 +185,10 @@
"action": "add", "action": "add",
"when": "6075a029dba70a89675ae1250e7cdfd91f0eba41", "when": "6075a029dba70a89675ae1250e7cdfd91f0eba41",
"short": "[priority] Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785)\n - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors" "short": "[priority] Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785)\n - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors"
},
{
"action": "add",
"when": "fb8b7f226d251e521a89b23c415e249e5b788e5c",
"short": "[priority] **The minimum *recommended* Python version has been raised to 3.9**\nSince Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
} }
] ]

View file

@ -46,6 +46,14 @@ VERBOSE_TMPL = '''
render: shell render: shell
validations: validations:
required: true required: true
- type: markdown
attributes:
value: |
> [!CAUTION]
> ### GitHub is experiencing a high volume of malicious spam comments.
> ### If you receive any replies asking you download a file, do NOT follow the download links!
>
> Note that this issue may be temporarily locked as an anti-spam measure after it is opened.
'''.strip() '''.strip()
NO_SKIP = ''' NO_SKIP = '''

View file

@ -49,14 +49,14 @@ dependencies = [
"pycryptodomex", "pycryptodomex",
"requests>=2.32.2,<3", "requests>=2.32.2,<3",
"urllib3>=1.26.17,<3", "urllib3>=1.26.17,<3",
"websockets>=12.0", "websockets>=13.0",
] ]
[project.optional-dependencies] [project.optional-dependencies]
default = [] default = []
curl-cffi = [ curl-cffi = [
"curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",
"curl-cffi>=0.5.10,!=0.6.*,<0.8; os_name!='nt' and implementation_name=='cpython'", "curl-cffi>=0.5.10,!=0.6.*,<0.7.2; os_name!='nt' and implementation_name=='cpython'",
] ]
secretstorage = [ secretstorage = [
"cffi", "cffi",
@ -66,7 +66,7 @@ build = [
"build", "build",
"hatchling", "hatchling",
"pip", "pip",
"setuptools", "setuptools>=71.0.2", # 71.0.0 broke pyinstaller
"wheel", "wheel",
] ]
dev = [ dev = [
@ -76,13 +76,13 @@ dev = [
] ]
static-analysis = [ static-analysis = [
"autopep8~=2.0", "autopep8~=2.0",
"ruff~=0.5.0", "ruff~=0.6.0",
] ]
test = [ test = [
"pytest~=8.1", "pytest~=8.1",
] ]
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.7.0", # for compat with setuptools>=70 "pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0
] ]
py2exe = [ py2exe = [
"py2exe>=0.12", "py2exe>=0.12",

View file

@ -143,6 +143,7 @@
- **BBVTV**: [*bbvtv*](## "netrc machine") - **BBVTV**: [*bbvtv*](## "netrc machine")
- **BBVTVLive**: [*bbvtv*](## "netrc machine") - **BBVTVLive**: [*bbvtv*](## "netrc machine")
- **BBVTVRecordings**: [*bbvtv*](## "netrc machine") - **BBVTVRecordings**: [*bbvtv*](## "netrc machine")
- **BeaconTv**
- **BeatBumpPlaylist** - **BeatBumpPlaylist**
- **BeatBumpVideo** - **BeatBumpVideo**
- **Beatport** - **Beatport**
@ -505,6 +506,7 @@
- **gem.cbc.ca:playlist** - **gem.cbc.ca:playlist**
- **Genius** - **Genius**
- **GeniusLyrics** - **GeniusLyrics**
- **Germanupa**: germanupa.de
- **GetCourseRu**: [*getcourseru*](## "netrc machine") - **GetCourseRu**: [*getcourseru*](## "netrc machine")
- **GetCourseRuPlayer** - **GetCourseRuPlayer**
- **Gettr** - **Gettr**
@ -580,6 +582,7 @@
- **HungamaAlbumPlaylist** - **HungamaAlbumPlaylist**
- **HungamaSong** - **HungamaSong**
- **huya:live**: huya.com - **huya:live**: huya.com
- **huya:video**: 虎牙视频
- **Hypem** - **Hypem**
- **Hytale** - **Hytale**
- **Icareus** - **Icareus**
@ -655,10 +658,12 @@
- **Ketnet** - **Ketnet**
- **khanacademy** - **khanacademy**
- **khanacademy:unit** - **khanacademy:unit**
- **Kick** - **kick:clips**
- **kick:live**
- **kick:vod**
- **Kicker** - **Kicker**
- **KickStarter** - **KickStarter**
- **KickVOD** - **Kika**: KiKA.de
- **kinja:embed** - **kinja:embed**
- **KinoPoisk** - **KinoPoisk**
- **Kommunetv** - **Kommunetv**
@ -690,6 +695,7 @@
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
- **Le**: 乐视网 - **Le**: 乐视网
- **LearningOnScreen**
- **Lecture2Go**: (**Currently broken**) - **Lecture2Go**: (**Currently broken**)
- **Lecturio**: [*lecturio*](## "netrc machine") - **Lecturio**: [*lecturio*](## "netrc machine")
- **LecturioCourse**: [*lecturio*](## "netrc machine") - **LecturioCourse**: [*lecturio*](## "netrc machine")
@ -720,7 +726,6 @@
- **livestream:original** - **livestream:original**
- **Livestreamfails** - **Livestreamfails**
- **Lnk** - **Lnk**
- **LnkGo**
- **loc**: Library of Congress - **loc**: Library of Congress
- **loom** - **loom**
- **loom:folder** - **loom:folder**
@ -754,7 +759,7 @@
- **Masters** - **Masters**
- **MatchTV** - **MatchTV**
- **MBN**: mbn.co.kr (매일방송) - **MBN**: mbn.co.kr (매일방송)
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE
- **MedalTV** - **MedalTV**
- **media.ccc.de** - **media.ccc.de**
- **media.ccc.de:lists** - **media.ccc.de:lists**
@ -809,6 +814,7 @@
- **MNetTVLive**: [*mnettv*](## "netrc machine") - **MNetTVLive**: [*mnettv*](## "netrc machine")
- **MNetTVRecordings**: [*mnettv*](## "netrc machine") - **MNetTVRecordings**: [*mnettv*](## "netrc machine")
- **MochaVideo** - **MochaVideo**
- **Mojevideo**: mojevideo.sk
- **Mojvideo** - **Mojvideo**
- **Monstercat** - **Monstercat**
- **MonsterSirenHypergryphMusic** - **MonsterSirenHypergryphMusic**
@ -1140,7 +1146,6 @@
- **QuantumTV**: [*quantumtv*](## "netrc machine") - **QuantumTV**: [*quantumtv*](## "netrc machine")
- **QuantumTVLive**: [*quantumtv*](## "netrc machine") - **QuantumTVLive**: [*quantumtv*](## "netrc machine")
- **QuantumTVRecordings**: [*quantumtv*](## "netrc machine") - **QuantumTVRecordings**: [*quantumtv*](## "netrc machine")
- **Qub**
- **R7**: (**Currently broken**) - **R7**: (**Currently broken**)
- **R7Article**: (**Currently broken**) - **R7Article**: (**Currently broken**)
- **Radiko** - **Radiko**
@ -1284,12 +1289,14 @@
- **Screencast** - **Screencast**
- **Screencastify** - **Screencastify**
- **ScreencastOMatic** - **ScreencastOMatic**
- **ScreenRec**
- **ScrippsNetworks** - **ScrippsNetworks**
- **scrippsnetworks:watch** - **scrippsnetworks:watch**
- **Scrolller** - **Scrolller**
- **SCTE**: [*scte*](## "netrc machine") (**Currently broken**) - **SCTE**: [*scte*](## "netrc machine") (**Currently broken**)
- **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**) - **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**)
- **sejm** - **sejm**
- **Sen**
- **SenalColombiaLive**: (**Currently broken**) - **SenalColombiaLive**: (**Currently broken**)
- **SenateGov** - **SenateGov**
- **SenateISVP** - **SenateISVP**
@ -1326,6 +1333,7 @@
- **SlidesLive** - **SlidesLive**
- **Slutload** - **Slutload**
- **Smotrim** - **Smotrim**
- **SnapchatSpotlight**
- **Snotr** - **Snotr**
- **Sohu** - **Sohu**
- **SohuV** - **SohuV**
@ -1517,9 +1525,9 @@
- **tv5unis** - **tv5unis**
- **tv5unis:video** - **tv5unis:video**
- **tv8.it** - **tv8.it**
- **TVA**
- **TVANouvelles** - **TVANouvelles**
- **TVANouvellesArticle** - **TVANouvellesArticle**
- **tvaplus**: TVA+
- **TVC** - **TVC**
- **TVCArticle** - **TVCArticle**
- **TVer** - **TVer**
@ -1607,6 +1615,7 @@
- **videomore:season** - **videomore:season**
- **videomore:video** - **videomore:video**
- **VideoPress** - **VideoPress**
- **Vidflex**
- **Vidio**: [*vidio*](## "netrc machine") - **Vidio**: [*vidio*](## "netrc machine")
- **VidioLive**: [*vidio*](## "netrc machine") - **VidioLive**: [*vidio*](## "netrc machine")
- **VidioPremier**: [*vidio*](## "netrc machine") - **VidioPremier**: [*vidio*](## "netrc machine")
@ -1735,7 +1744,7 @@
- **XiaoHongShu**: 小红书 - **XiaoHongShu**: 小红书
- **ximalaya**: 喜马拉雅FM - **ximalaya**: 喜马拉雅FM
- **ximalaya:album**: 喜马拉雅FM 专辑 - **ximalaya:album**: 喜马拉雅FM 专辑
- **xinpianchang**: xinpianchang.com (**Currently broken**) - **Xinpianchang**: 新片场
- **XMinus**: (**Currently broken**) - **XMinus**: (**Currently broken**)
- **XNXX** - **XNXX**
- **Xstream** - **Xstream**

View file

@ -236,6 +236,35 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
def test_format_selection_by_vcodec_sort(self):
formats = [
{'format_id': 'av1-format', 'ext': 'mp4', 'vcodec': 'av1', 'acodec': 'none', 'url': TEST_URL},
{'format_id': 'vp9-hdr-format', 'ext': 'mp4', 'vcodec': 'vp09.02.50.10.01.09.18.09.00', 'acodec': 'none', 'url': TEST_URL},
{'format_id': 'vp9-sdr-format', 'ext': 'mp4', 'vcodec': 'vp09.00.50.08', 'acodec': 'none', 'url': TEST_URL},
{'format_id': 'h265-format', 'ext': 'mp4', 'vcodec': 'h265', 'acodec': 'none', 'url': TEST_URL},
]
info_dict = _make_result(formats)
ydl = YDL({'format': 'bestvideo', 'format_sort': ['vcodec:vp9.2']})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vp9-hdr-format')
ydl = YDL({'format': 'bestvideo', 'format_sort': ['vcodec:vp9']})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vp9-sdr-format')
ydl = YDL({'format': 'bestvideo', 'format_sort': ['+vcodec:vp9.2']})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vp9-hdr-format')
ydl = YDL({'format': 'bestvideo', 'format_sort': ['+vcodec:vp9']})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vp9-sdr-format')
def test_format_selection_string_ops(self): def test_format_selection_string_ops(self):
formats = [ formats = [
{'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},

View file

@ -403,6 +403,34 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, [''], args=['', '-']) self._test(jsi, [''], args=['', '-'])
self._test(jsi, [], args=['', '']) self._test(jsi, [], args=['', ''])
def test_slice(self):
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
self._test('function f(){return "012345678".slice()}', '012345678')
self._test('function f(){return "012345678".slice(0)}', '012345678')
self._test('function f(){return "012345678".slice(5)}', '5678')
self._test('function f(){return "012345678".slice(99)}', '')
self._test('function f(){return "012345678".slice(-2)}', '78')
self._test('function f(){return "012345678".slice(-99)}', '012345678')
self._test('function f(){return "012345678".slice(0, 0)}', '')
self._test('function f(){return "012345678".slice(1, 0)}', '')
self._test('function f(){return "012345678".slice(0, 1)}', '0')
self._test('function f(){return "012345678".slice(3, 6)}', '345')
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
self._test('function f(){return "012345678".slice(-1, 1)}', '')
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -822,6 +822,24 @@ class TestRequestHandlerMisc:
rh.close() rh.close()
assert len(logging_handlers) == before_count assert len(logging_handlers) == before_count
def test_wrap_request_errors(self):
class TestRequestHandler(RequestHandler):
def _validate(self, request):
if request.headers.get('x-fail'):
raise UnsupportedRequest('test error')
def _send(self, request: Request):
raise RequestError('test error')
with TestRequestHandler(logger=FakeLogger()) as rh:
with pytest.raises(UnsupportedRequest, match='test error') as exc_info:
rh.validate(Request('http://example.com', headers={'x-fail': '1'}))
assert exc_info.value.handler is rh
with pytest.raises(RequestError, match='test error') as exc_info:
rh.send(Request('http://example.com'))
assert exc_info.value.handler is rh
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True) @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
class TestUrllibRequestHandler(TestRequestHandlerBase): class TestUrllibRequestHandler(TestRequestHandlerBase):

View file

@ -444,6 +444,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540)
self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140)
self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363) self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363)
self.assertEqual(unified_timestamp('Sunday, 26 Nov 2006, 19:00'), 1164567600)
self.assertEqual(unified_timestamp('wed, aug 16, 2008, 12:00pm'), 1218931200)
self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1) self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1)
self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86) self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86)
@ -919,6 +921,11 @@ class TestUtil(unittest.TestCase):
'acodec': 'none', 'acodec': 'none',
'dynamic_range': 'HDR10', 'dynamic_range': 'HDR10',
}) })
self.assertEqual(parse_codecs('vp09.02.50.10.01.09.18.09.00'), {
'vcodec': 'vp09.02.50.10.01.09.18.09.00',
'acodec': 'none',
'dynamic_range': 'HDR10',
})
self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), {
'vcodec': 'av01.0.12M.10.0.110.09.16.09.0', 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0',
'acodec': 'none', 'acodec': 'none',

View file

@ -88,7 +88,7 @@ def create_wss_websocket_server():
certfn = os.path.join(TEST_DIR, 'testcert.pem') certfn = os.path.join(TEST_DIR, 'testcert.pem')
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.load_cert_chain(certfn, None) sslctx.load_cert_chain(certfn, None)
return create_websocket_server(ssl_context=sslctx) return create_websocket_server(ssl=sslctx)
MTLS_CERT_DIR = os.path.join(TEST_DIR, 'testdata', 'certificate') MTLS_CERT_DIR = os.path.join(TEST_DIR, 'testdata', 'certificate')
@ -103,7 +103,7 @@ def create_mtls_wss_websocket_server():
sslctx.load_verify_locations(cafile=cacertfn) sslctx.load_verify_locations(cafile=cacertfn)
sslctx.load_cert_chain(certfn, None) sslctx.load_cert_chain(certfn, None)
return create_websocket_server(ssl_context=sslctx) return create_websocket_server(ssl=sslctx)
def create_legacy_wss_websocket_server(): def create_legacy_wss_websocket_server():
@ -112,7 +112,7 @@ def create_legacy_wss_websocket_server():
sslctx.maximum_version = ssl.TLSVersion.TLSv1_2 sslctx.maximum_version = ssl.TLSVersion.TLSv1_2
sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL') sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL')
sslctx.load_cert_chain(certfn, None) sslctx.load_cert_chain(certfn, None)
return create_websocket_server(ssl_context=sslctx) return create_websocket_server(ssl=sslctx)
def ws_validate_and_send(rh, req): def ws_validate_and_send(rh, req):
@ -139,7 +139,7 @@ class TestWebsSocketRequestHandlerConformance:
cls.wss_thread, cls.wss_port = create_wss_websocket_server() cls.wss_thread, cls.wss_port = create_wss_websocket_server()
cls.wss_base_url = f'wss://127.0.0.1:{cls.wss_port}' cls.wss_base_url = f'wss://127.0.0.1:{cls.wss_port}'
cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl_context=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)) cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER))
cls.bad_wss_host = f'wss://127.0.0.1:{cls.bad_wss_port}' cls.bad_wss_host = f'wss://127.0.0.1:{cls.bad_wss_port}'
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server() cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()

View file

@ -171,6 +171,18 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A', 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
), ),
(
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
),
(
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
),
(
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
),
] ]

View file

@ -235,6 +235,11 @@ def validate_options(opts):
validate_regex('format sorting', f, FormatSorter.regex) validate_regex('format sorting', f, FormatSorter.regex)
# Postprocessor formats # Postprocessor formats
if opts.convertsubtitles == 'none':
opts.convertsubtitles = None
if opts.convertthumbnails == 'none':
opts.convertthumbnails = None
validate_regex('merge output format', opts.merge_output_format, validate_regex('merge output format', opts.merge_output_format,
r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS))))
validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)

View file

@ -1053,8 +1053,9 @@ def _decrypt_windows_dpapi(ciphertext, logger):
ctypes.byref(blob_out), # pDataOut ctypes.byref(blob_out), # pDataOut
) )
if not ret: if not ret:
logger.warning('failed to decrypt with DPAPI', only_once=True) message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
return None logger.error(message)
raise DownloadError(message) # force exit
result = ctypes.string_at(blob_out.pbData, blob_out.cbData) result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
ctypes.windll.kernel32.LocalFree(blob_out.pbData) ctypes.windll.kernel32.LocalFree(blob_out.pbData)

View file

@ -508,7 +508,7 @@ class FFmpegFD(ExternalFD):
env = None env = None
proxy = self.params.get('proxy') proxy = self.params.get('proxy')
if proxy: if proxy:
if not re.match(r'^[\da-zA-Z]+://', proxy): if not re.match(r'[\da-zA-Z]+://', proxy):
proxy = f'http://{proxy}' proxy = f'http://{proxy}'
if proxy.startswith('socks'): if proxy.startswith('socks'):
@ -559,7 +559,7 @@ class FFmpegFD(ExternalFD):
selected_formats = info_dict.get('requested_formats') or [info_dict] selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats): for i, fmt in enumerate(selected_formats):
is_http = re.match(r'^https?://', fmt['url']) is_http = re.match(r'https?://', fmt['url'])
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
if cookies: if cookies:
args.extend(['-cookies', ''.join( args.extend(['-cookies', ''.join(

View file

@ -217,6 +217,7 @@ from .bbc import (
BBCCoUkIPlayerGroupIE, BBCCoUkIPlayerGroupIE,
BBCCoUkPlaylistIE, BBCCoUkPlaylistIE,
) )
from .beacon import BeaconTvIE
from .beatbump import ( from .beatbump import (
BeatBumpPlaylistIE, BeatBumpPlaylistIE,
BeatBumpVideoIE, BeatBumpVideoIE,
@ -729,6 +730,7 @@ from .genius import (
GeniusIE, GeniusIE,
GeniusLyricsIE, GeniusLyricsIE,
) )
from .germanupa import GermanupaIE
from .getcourseru import ( from .getcourseru import (
GetCourseRuIE, GetCourseRuIE,
GetCourseRuPlayerIE, GetCourseRuPlayerIE,
@ -822,7 +824,10 @@ from .hungama import (
HungamaIE, HungamaIE,
HungamaSongIE, HungamaSongIE,
) )
from .huya import HuyaLiveIE from .huya import (
HuyaLiveIE,
HuyaVideoIE,
)
from .hypem import HypemIE from .hypem import HypemIE
from .hypergryph import MonsterSirenHypergryphMusicIE from .hypergryph import MonsterSirenHypergryphMusicIE
from .hytale import HytaleIE from .hytale import HytaleIE
@ -939,11 +944,13 @@ from .khanacademy import (
KhanAcademyUnitIE, KhanAcademyUnitIE,
) )
from .kick import ( from .kick import (
KickClipIE,
KickIE, KickIE,
KickVODIE, KickVODIE,
) )
from .kicker import KickerIE from .kicker import KickerIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kika import KikaIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE from .kommunetv import KommunetvIE
@ -986,6 +993,7 @@ from .lcp import (
LcpIE, LcpIE,
LcpPlayIE, LcpPlayIE,
) )
from .learningonscreen import LearningOnScreenIE
from .lecture2go import Lecture2GoIE from .lecture2go import Lecture2GoIE
from .lecturio import ( from .lecturio import (
LecturioCourseIE, LecturioCourseIE,
@ -1034,10 +1042,7 @@ from .livestream import (
LivestreamShortenerIE, LivestreamShortenerIE,
) )
from .livestreamfails import LivestreamfailsIE from .livestreamfails import LivestreamfailsIE
from .lnkgo import ( from .lnk import LnkIE
LnkGoIE,
LnkIE,
)
from .loom import ( from .loom import (
LoomFolderIE, LoomFolderIE,
LoomIE, LoomIE,
@ -1162,6 +1167,7 @@ from .mlb import (
) )
from .mlssoccer import MLSSoccerIE from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE from .mocha import MochaVideoIE
from .mojevideo import MojevideoIE
from .mojvideo import MojvideoIE from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE from .monstercat import MonstercatIE
from .motherless import ( from .motherless import (
@ -1808,6 +1814,7 @@ from .screen9 import Screen9IE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastify import ScreencastifyIE from .screencastify import ScreencastifyIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .screenrec import ScreenRecIE
from .scrippsnetworks import ( from .scrippsnetworks import (
ScrippsNetworksIE, ScrippsNetworksIE,
ScrippsNetworksWatchIE, ScrippsNetworksWatchIE,
@ -1818,6 +1825,7 @@ from .scte import (
SCTECourseIE, SCTECourseIE,
) )
from .sejmpl import SejmIE from .sejmpl import SejmIE
from .sen import SenIE
from .senalcolombia import SenalColombiaLiveIE from .senalcolombia import SenalColombiaLiveIE
from .senategov import ( from .senategov import (
SenateGovIE, SenateGovIE,
@ -1873,6 +1881,7 @@ from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE from .slutload import SlutloadIE
from .smotrim import SmotrimIE from .smotrim import SmotrimIE
from .snapchat import SnapchatSpotlightIE
from .snotr import SnotrIE from .snotr import SnotrIE
from .sohu import ( from .sohu import (
SohuIE, SohuIE,
@ -2169,10 +2178,7 @@ from .tv5unis import (
TV5UnisVideoIE, TV5UnisVideoIE,
) )
from .tv24ua import TV24UAVideoIE from .tv24ua import TV24UAVideoIE
from .tva import ( from .tva import TVAIE
TVAIE,
QubIE,
)
from .tvanouvelles import ( from .tvanouvelles import (
TVANouvellesArticleIE, TVANouvellesArticleIE,
TVANouvellesIE, TVANouvellesIE,
@ -2312,6 +2318,7 @@ from .videomore import (
VideomoreVideoIE, VideomoreVideoIE,
) )
from .videopress import VideoPressIE from .videopress import VideoPressIE
from .vidflex import VidflexIE
from .vidio import ( from .vidio import (
VidioIE, VidioIE,
VidioLiveIE, VidioLiveIE,

View file

@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
}, },
'playlist_count': 15, 'playlist_count': 15,
'skip': 'This program is not currently available in ABC iview',
}, {
'url': 'https://iview.abc.net.au/show/inbestigators',
'info_dict': {
'id': '175343-1',
'title': 'Series 1',
'description': 'md5:b9976935a6450e5b78ce2a940a755685',
'series': 'The Inbestigators',
'season': 'Series 1',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg',
},
'playlist_count': 17,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id) webpage = self._download_webpage(url, show_id)
webpage_data = self._search_regex( video_data = self._search_json(
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id,
webpage, 'initial state') transform_source=lambda x: x.encode().decode('unicode_escape'),
video_data = self._parse_json( end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded']
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
video_data = video_data['route']['pageData']['_embedded']
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'): if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'):

View file

@ -9,12 +9,12 @@ import re
import struct import struct
import time import time
import urllib.parse import urllib.parse
import urllib.request
import urllib.response
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_ecb_decrypt from ..aes import aes_ecb_decrypt
from ..networking import RequestHandler, Response
from ..networking.exceptions import TransportError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
@ -26,37 +26,36 @@ from ..utils import (
traverse_obj, traverse_obj,
update_url_query, update_url_query,
) )
from ..utils.networking import clean_proxies
def add_opener(ydl, handler): # FIXME: Create proper API in .networking class AbemaLicenseRH(RequestHandler):
"""Add a handler for opening URLs, like _download_webpage""" _SUPPORTED_URL_SCHEMES = ('abematv-license',)
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 _SUPPORTED_PROXY_SCHEMES = None
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 _SUPPORTED_FEATURES = None
rh = ydl._request_director.handlers['Urllib'] RH_NAME = 'abematv_license'
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
return
headers = ydl.params['http_headers'].copy()
proxies = ydl.proxies.copy()
clean_proxies(proxies, headers)
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
assert isinstance(opener, urllib.request.OpenerDirector)
opener.add_handler(handler)
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
class AbemaLicenseHandler(urllib.request.BaseHandler): def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
handler_order = 499 super().__init__(**kwargs)
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
def __init__(self, ie: 'AbemaTVIE'):
# the protocol that this should really handle is 'abematv-license://'
# abematv_license_open is just a placeholder for development purposes
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
self.ie = ie self.ie = ie
def _send(self, request):
url = request.url
ticket = urllib.parse.urlparse(url).netloc
try:
response_data = self._get_videokey_from_ticket(ticket)
except ExtractorError as e:
raise TransportError(cause=e.cause) from e
except (IndexError, KeyError, TypeError) as e:
raise TransportError(cause=repr(e)) from e
return Response(
io.BytesIO(response_data), url,
headers={'Content-Length': str(len(response_data))})
def _get_videokey_from_ticket(self, ticket): def _get_videokey_from_ticket(self, ticket):
to_show = self.ie.get_param('verbose', False) to_show = self.ie.get_param('verbose', False)
media_token = self.ie._get_media_token(to_show=to_show) media_token = self.ie._get_media_token(to_show=to_show)
@ -72,25 +71,17 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}) })
res = decode_base_n(license_response['k'], table=self.STRTABLE) res = decode_base_n(license_response['k'], table=self._STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
h = hmac.new( h = hmac.new(
binascii.unhexlify(self.HKEY), binascii.unhexlify(self._HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode(), (license_response['cid'] + self.ie._DEVICE_ID).encode(),
digestmod=hashlib.sha256) digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest()) enckey = bytes_to_intlist(h.digest())
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
def abematv_license_open(self, url):
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': str(len(response_data)),
}, url=url, code=200)
class AbemaTVBaseIE(InfoExtractor): class AbemaTVBaseIE(InfoExtractor):
_NETRC_MACHINE = 'abematv' _NETRC_MACHINE = 'abematv'
@ -139,7 +130,7 @@ class AbemaTVBaseIE(InfoExtractor):
if self._USERTOKEN: if self._USERTOKEN:
return self._USERTOKEN return self._USERTOKEN
add_opener(self._downloader, AbemaLicenseHandler(self)) self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
username, _ = self._get_login_info() username, _ = self._get_login_info()
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19') auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
@ -386,8 +377,7 @@ class AbemaTVIE(AbemaTVBaseIE):
f'https://api.abema.io/v1/video/programs/{video_id}', video_id, f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
note='Checking playability', note='Checking playability',
headers=headers) headers=headers)
ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType')) if not traverse_obj(api_response, ('label', 'free', {bool})):
if 3 not in ondemand_types:
# cannot acquire decryption key for these streams # cannot acquire decryption key for these streams
self.report_warning('This is a premium-only stream') self.report_warning('This is a premium-only stream')
availability = 'premium_only' availability = 'premium_only'

View file

@ -4,7 +4,7 @@ from .common import InfoExtractor
class AcademicEarthCourseIE(InfoExtractor): class AcademicEarthCourseIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' _VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)'
IE_NAME = 'AcademicEarth:Course' IE_NAME = 'AcademicEarth:Course'
_TEST = { _TEST = {
'url': 'http://academicearth.org/playlists/laws-of-nature/', 'url': 'http://academicearth.org/playlists/laws-of-nature/',

View file

@ -49,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
class ADNIE(ADNBaseIE): class ADNIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir', 'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261', 'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': { 'info_dict': {
'id': '9841', 'id': '9841',
@ -71,10 +71,7 @@ class ADNIE(ADNBaseIE):
}, },
'skip': 'Only available in French and German speaking Europe', 'skip': 'Only available in French and German speaking Europe',
}, { }, {
'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', 'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1',
'only_matching': True,
}, {
'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
'md5': '5c5651bf5791fa6fcd7906012b9d94e8', 'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
'info_dict': { 'info_dict': {
'id': '23550', 'id': '23550',
@ -167,7 +164,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
'username': username, 'username': username,
})) or {}).get('accessToken') })) or {}).get('accessToken')
if access_token: if access_token:
self._HEADERS = {'authorization': 'Bearer ' + access_token} self._HEADERS['Authorization'] = f'Bearer {access_token}'
except ExtractorError as e: except ExtractorError as e:
message = None message = None
if isinstance(e.cause, HTTPError) and e.cause.status == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
@ -178,6 +175,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
def _real_extract(self, url): def _real_extract(self, url):
lang, video_id = self._match_valid_url(url).group('lang', 'id') lang, video_id = self._match_valid_url(url).group('lang', 'id')
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/' video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
player = self._download_json( player = self._download_json(
video_base_url + 'configuration', video_id, video_base_url + 'configuration', video_id,
@ -218,7 +216,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
links_data = self._download_json( links_data = self._download_json(
links_url, video_id, 'Downloading links JSON metadata', headers={ links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization, 'X-Player-Token': authorization,
'X-Target-Distribution': lang or 'fr',
**self._HEADERS, **self._HEADERS,
}, query={ }, query={
'freeWithAds': 'true', 'freeWithAds': 'true',
@ -257,6 +254,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
load_balancer_data = self._download_json( load_balancer_data = self._download_json(
load_balancer_url, video_id, load_balancer_url, video_id,
f'Downloading {format_id} {quality} JSON metadata', f'Downloading {format_id} {quality} JSON metadata',
headers=self._HEADERS,
fatal=False) or {} fatal=False) or {}
m3u8_url = load_balancer_data.get('location') m3u8_url = load_balancer_data.get('location')
if not m3u8_url: if not m3u8_url:
@ -277,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
video = (self._download_json( video = (self._download_json(
self._API_BASE_URL + f'video/{video_id}', video_id, self._API_BASE_URL + f'video/{video_id}', video_id,
'Downloading additional video metadata', fatal=False) or {}).get('video') or {} 'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {}
show = video.get('show') or {} show = video.get('show') or {}
return { return {
@ -299,9 +297,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
class ADNSeasonIE(ADNBaseIE): class ADNSeasonIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])' _VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>\d+)[^/?#]*/?(?:$|[#?])'
_TESTS = [{ _TESTS = [{
'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new', 'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new',
'playlist_count': 12, 'playlist_count': 12,
'info_dict': { 'info_dict': {
'id': '911', 'id': '911',
@ -312,16 +310,14 @@ class ADNSeasonIE(ADNBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
lang, video_show_slug = self._match_valid_url(url).group('lang', 'id') lang, video_show_slug = self._match_valid_url(url).group('lang', 'id')
self._HEADERS['X-Target-Distribution'] = lang or 'fr'
show = self._download_json( show = self._download_json(
f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug, f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug,
'Downloading show JSON metadata', headers=self._HEADERS)['show'] 'Downloading show JSON metadata', headers=self._HEADERS)['show']
show_id = str(show['id']) show_id = str(show['id'])
episodes = self._download_json( episodes = self._download_json(
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug, f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers={ 'Downloading episode list', headers=self._HEADERS, query={
'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'order': 'asc', 'order': 'asc',
'limit': '-1', 'limit': '-1',
}) })

View file

@ -1,27 +1,42 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
clean_podcast_url, clean_podcast_url,
get_element_by_class,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
try_get,
) )
from ..utils.traversal import traverse_obj
class ApplePodcastsIE(InfoExtractor): class ApplePodcastsIE(InfoExtractor):
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)' _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654',
'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172',
'info_dict': {
'id': '1000665010654',
'ext': 'mp3',
'title': 'Ferreck Dawn - To The Break of Dawn 117',
'episode': 'Ferreck Dawn - To The Break of Dawn 117',
'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc',
'upload_date': '20240812',
'timestamp': 1723449600,
'duration': 3596,
'series': 'Ferreck Dawn - To The Break of Dawn',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
},
}, {
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', 'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'md5': '41dc31cd650143e530d9423b6b5a344f', 'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052',
'info_dict': { 'info_dict': {
'id': '1000482637777', 'id': '1000482637777',
'ext': 'mp3', 'ext': 'mp3',
'title': '207 - Whitney Webb Returns', 'title': '207 - Whitney Webb Returns',
'episode': '207 - Whitney Webb Returns',
'episode_number': 207,
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6', 'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
'upload_date': '20200705', 'upload_date': '20200705',
'timestamp': 1593932400, 'timestamp': 1593932400,
'duration': 6454, 'duration': 5369,
'series': 'The Tim Dillon Show', 'series': 'The Tim Dillon Show',
'thumbnail': 're:.+[.](png|jpe?g|webp)', 'thumbnail': 're:.+[.](png|jpe?g|webp)',
}, },
@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
episode_id = self._match_id(url) episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id) webpage = self._download_webpage(url, episode_id)
episode_data = {} server_data = self._search_json(
ember_data = {} r'<script [^>]*\bid=["\']serialized-server-data["\'][^>]*>', webpage,
# new page type 2021-11 'server data', episode_id, contains_pattern=r'\[{(?s:.+)}\]')[0]['data']
amp_data = self._parse_json(self._search_regex( model_data = traverse_obj(server_data, (
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<', 'headerButtonItems', lambda _, v: v['$kind'] == 'bookmark' and v['modelType'] == 'EpisodeOffer',
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {} 'model', {dict}, any))
amp_data = try_get(amp_data,
lambda a: self._parse_json(
next(a[x] for x in iter(a) if episode_id in x),
episode_id),
dict) or {}
amp_data = amp_data.get('d') or []
episode_data = try_get(
amp_data,
lambda a: next(x for x in a
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
dict)
if not episode_data:
# try pre 2021-11 page type: TODO: consider deleting if no longer used
ember_data = self._parse_json(self._search_regex(
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
webpage, 'ember data'), episode_id) or {}
ember_data = ember_data.get(episode_id) or ember_data
episode_data = try_get(ember_data, lambda x: x['data'], dict)
episode = episode_data['attributes']
description = episode.get('description') or {}
series = None
for inc in (amp_data or ember_data.get('included') or []):
if inc.get('type') == 'media/podcast':
series = try_get(inc, lambda x: x['attributes']['name'])
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
return { return {
'id': episode_id, 'id': episode_id,
'title': episode.get('name'), **self._json_ld(
'url': clean_podcast_url(episode['assetUrl']), traverse_obj(server_data, ('seoData', 'schemaContent', {dict}))
'description': description.get('standard') or description.get('short'), or self._yield_json_ld(webpage, episode_id, fatal=False), episode_id, fatal=False),
'timestamp': parse_iso8601(episode.get('releaseDateTime')), **traverse_obj(model_data, {
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000), 'title': ('title', {str}),
'series': series, 'url': ('streamUrl', {clean_podcast_url}),
'timestamp': ('releaseDate', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
}),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'vcodec': 'none', 'vcodec': 'none',
} }

View file

@ -231,7 +231,7 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(InfoExtractor): class ARDBetaMediathekIE(InfoExtractor):
IE_NAME = 'ARDMediathek' IE_NAME = 'ARDMediathek'
_VALID_URL = r'''(?x)https:// _VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/]+/)? (?:[^/]+/)?
(?:player|live|video)/ (?:player|live|video)/
@ -470,7 +470,7 @@ class ARDBetaMediathekIE(InfoExtractor):
class ARDMediathekCollectionIE(InfoExtractor): class ARDMediathekCollectionIE(InfoExtractor):
_VALID_URL = r'''(?x)https:// _VALID_URL = r'''(?x)https?://
(?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?:beta|www)\.)?ardmediathek\.de/
(?:[^/?#]+/)? (?:[^/?#]+/)?
(?P<playlist>sendung|serie|sammlung)/ (?P<playlist>sendung|serie|sammlung)/

View file

@ -101,9 +101,10 @@ class AsobiStageIE(InfoExtractor):
self._HEADERS['Authorization'] = f'Bearer {token}' self._HEADERS['Authorization'] = f'Bearer {token}'
def _real_extract(self, url): def _real_extract(self, url):
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug') webpage, urlh = self._download_webpage_handle(url, self._match_id(url))
video_id, event, type_, slug = self._match_valid_url(urlh.url).group('id', 'event', 'type', 'slug')
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_] video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
webpage = self._download_webpage(url, video_id)
event_data = traverse_obj( event_data = traverse_obj(
self._search_nextjs_data(webpage, video_id, default={}), self._search_nextjs_data(webpage, video_id, default={}),
('props', 'pageProps', 'eventCMSData', { ('props', 'pageProps', 'eventCMSData', {

View file

@ -1,3 +1,5 @@
import functools
import json
import random import random
import re import re
import time import time
@ -6,7 +8,9 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
ExtractorError, ExtractorError,
extract_attributes,
float_or_none, float_or_none,
get_element_html_by_id,
int_or_none, int_or_none,
parse_filesize, parse_filesize,
str_or_none, str_or_none,
@ -17,6 +21,7 @@ from ..utils import (
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
@ -459,7 +464,7 @@ class BandcampUserIE(InfoExtractor):
}, },
}, { }, {
'url': 'https://coldworldofficial.bandcamp.com/music', 'url': 'https://coldworldofficial.bandcamp.com/music',
'playlist_mincount': 10, 'playlist_mincount': 7,
'info_dict': { 'info_dict': {
'id': 'coldworldofficial', 'id': 'coldworldofficial',
'title': 'Discography of coldworldofficial', 'title': 'Discography of coldworldofficial',
@ -473,12 +478,19 @@ class BandcampUserIE(InfoExtractor):
}, },
}] }]
def _yield_items(self, webpage):
yield from (
re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
yield from traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
'data-client-items', {json.loads}, ..., 'page_url', {str}))
def _real_extract(self, url): def _real_extract(self, url):
uploader = self._match_id(url) uploader = self._match_id(url)
webpage = self._download_webpage(url, uploader) webpage = self._download_webpage(url, uploader)
discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
return self.playlist_from_matches( return self.playlist_from_matches(
discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x)) self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url))

View file

@ -0,0 +1,68 @@
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
parse_iso8601,
traverse_obj,
)
class BeaconTvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?beacon\.tv/content/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://beacon.tv/content/welcome-to-beacon',
'md5': 'b3f5932d437f288e662f10f3bfc5bd04',
'info_dict': {
'id': 'welcome-to-beacon',
'ext': 'mp4',
'upload_date': '20240509',
'description': 'md5:ea2bd32e71acf3f9fca6937412cc3563',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/I4CkkEvN/poster.jpg?width=720',
'title': 'Your home for Critical Role!',
'timestamp': 1715227200,
'duration': 105.494,
},
}, {
'url': 'https://beacon.tv/content/re-slayers-take-trailer',
'md5': 'd879b091485dbed2245094c8152afd89',
'info_dict': {
'id': 're-slayers-take-trailer',
'ext': 'mp4',
'title': 'The Re-Slayers Take | Official Trailer',
'timestamp': 1715189040,
'upload_date': '20240508',
'duration': 53.249,
'thumbnail': 'https://cdn.jwplayer.com/v2/media/PW5ApIw3/poster.jpg?width=720',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
content_data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', '__APOLLO_STATE__',
lambda k, v: k.startswith('Content:') and v['slug'] == video_id, any))
if not content_data:
raise ExtractorError('Failed to extract content data')
jwplayer_data = traverse_obj(content_data, (
(('contentVideo', 'video', 'videoData'),
('contentPodcast', 'podcast', 'audioData')), {json.loads}, {dict}, any))
if not jwplayer_data:
if content_data.get('contentType') not in ('videoPodcast', 'video', 'podcast'):
raise ExtractorError('Content is not a video/podcast', expected=True)
if traverse_obj(content_data, ('contentTier', '__ref')) != 'MemberTier:65b258d178f89be87b4dc0a4':
self.raise_login_required('This video/podcast is for members only')
raise ExtractorError('Failed to extract content')
return {
**self._parse_jwplayer_data(jwplayer_data, video_id),
**traverse_obj(content_data, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
}),
}

View file

@ -46,6 +46,7 @@ from ..utils import (
class BilibiliBaseIE(InfoExtractor): class BilibiliBaseIE(InfoExtractor):
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?') _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
_wbi_key_cache = {} _wbi_key_cache = {}
@ -192,7 +193,7 @@ class BilibiliBaseIE(InfoExtractor):
video_info = self._download_json( video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id, 'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}') note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
if traverse_obj(video_info, ('data', 'need_login_subtitle')): if traverse_obj(video_info, ('data', 'need_login_subtitle')):
self.report_warning( self.report_warning(
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True) f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
@ -207,7 +208,7 @@ class BilibiliBaseIE(InfoExtractor):
def _get_chapters(self, aid, cid): def _get_chapters(self, aid, cid):
chapters = aid and cid and self._download_json( chapters = aid and cid and self._download_json(
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid}, 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
note='Extracting chapters', fatal=False) note='Extracting chapters', fatal=False, headers=self._HEADERS)
return traverse_obj(chapters, ('data', 'view_points', ..., { return traverse_obj(chapters, ('data', 'view_points', ..., {
'title': 'content', 'title': 'content',
'start_time': 'from', 'start_time': 'from',
@ -298,7 +299,7 @@ class BilibiliBaseIE(InfoExtractor):
class BiliBiliIE(BilibiliBaseIE): class BiliBiliIE(BilibiliBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bilibili.com/video/BV13x41117TL', 'url': 'https://www.bilibili.com/video/BV13x41117TL',
@ -622,6 +623,10 @@ class BiliBiliIE(BilibiliBaseIE):
'ext': 'mp4', 'ext': 'mp4',
}, },
'skip': 'geo-restricted', 'skip': 'geo-restricted',
}, {
'note': 'has - in the last path segment of the url',
'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -1017,8 +1022,6 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
class BilibiliCheeseBaseIE(BilibiliBaseIE): class BilibiliCheeseBaseIE(BilibiliBaseIE):
_HEADERS = {'Referer': 'https://www.bilibili.com/'}
def _extract_episode(self, season_info, ep_id): def _extract_episode(self, season_info, ep_id):
episode_info = traverse_obj(season_info, ( episode_info = traverse_obj(season_info, (
'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False) 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
@ -1848,7 +1851,7 @@ class BiliBiliPlayerIE(InfoExtractor):
class BiliIntlBaseIE(InfoExtractor): class BiliIntlBaseIE(InfoExtractor):
_API_URL = 'https://api.bilibili.tv/intl/gateway' _API_URL = 'https://api.bilibili.tv/intl/gateway'
_NETRC_MACHINE = 'biliintl' _NETRC_MACHINE = 'biliintl'
_HEADERS = {'Referer': 'https://www.bilibili.com/'} _HEADERS = {'Referer': 'https://www.bilibili.tv/'}
def _call_api(self, endpoint, *args, **kwargs): def _call_api(self, endpoint, *args, **kwargs):
json = self._download_json(self._API_URL + endpoint, *args, **kwargs) json = self._download_json(self._API_URL + endpoint, *args, **kwargs)

View file

@ -3,7 +3,7 @@ from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj
class CallinIE(InfoExtractor): class CallinIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)' _VALID_URL = r'https?://(?:www\.)?callin\.com/episode/(?P<id>[-a-zA-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc', 'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
'info_dict': { 'info_dict': {

View file

@ -1,4 +1,5 @@
import base64 import base64
import functools
import json import json
import re import re
import time import time
@ -6,17 +7,24 @@ import urllib.parse
import xml.etree.ElementTree import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
js_to_json, js_to_json,
mimetype2ext,
orderedSet, orderedSet,
parse_iso8601, parse_iso8601,
replace_extension,
smuggle_url, smuggle_url,
strip_or_none, strip_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
update_url,
url_basename,
url_or_none,
) )
@ -149,6 +157,7 @@ class CBCIE(InfoExtractor):
class CBCPlayerIE(InfoExtractor): class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player' IE_NAME = 'cbc.ca:player'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)' _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_GEO_COUNTRIES = ['CA']
_TESTS = [{ _TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193', 'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c', 'md5': '64d25f841ddf4ddb28a235338af32e2c',
@ -172,21 +181,20 @@ class CBCPlayerIE(InfoExtractor):
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9', 'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
'timestamp': 1425704400, 'timestamp': 1425704400,
'upload_date': '20150307', 'upload_date': '20150307',
'uploader': 'CBCC-NEW', 'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'chapters': [], 'chapters': [],
'duration': 494.811, 'duration': 494.811,
'categories': ['AudioMobile/All in a Weekend Montreal'], 'categories': ['All in a Weekend Montreal'],
'tags': 'count:8', 'tags': 'count:11',
'location': 'Quebec', 'location': 'Quebec',
'series': 'All in a Weekend Montreal', 'series': 'All in a Weekend Montreal',
'season': 'Season 2015', 'season': 'Season 2015',
'season_number': 2015, 'season_number': 2015,
'media_type': 'Excerpt', 'media_type': 'Excerpt',
'genres': ['Other'],
}, },
}, { }, {
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062', 'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': { 'info_dict': {
'id': '2164402062', 'id': '2164402062',
'ext': 'mp4', 'ext': 'mp4',
@ -194,107 +202,168 @@ class CBCPlayerIE(InfoExtractor):
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
'timestamp': 1320410746, 'timestamp': 1320410746,
'upload_date': '20111104', 'upload_date': '20111104',
'uploader': 'CBCC-NEW', 'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'chapters': [], 'chapters': [],
'duration': 186.867, 'duration': 186.867,
'series': 'CBC News: Windsor at 6:00', 'series': 'CBC News: Windsor at 6:00',
'categories': ['News/Canada/Windsor'], 'categories': ['Windsor'],
'location': 'Windsor', 'location': 'Windsor',
'tags': ['cancer'], 'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
'creators': ['Allison Johnson'],
'media_type': 'Excerpt', 'media_type': 'Excerpt',
'genres': ['News'],
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
'url': 'https://www.cbc.ca/player/play/1.2985700', 'url': 'https://www.cbc.ca/player/play/1.2985700',
'md5': 'e5e708c34ae6fca156aafe17c43e8b75', 'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
'info_dict': { 'info_dict': {
'id': '2657631896', 'id': '1.2985700',
'ext': 'mp3', 'ext': 'mp3',
'title': 'CBC Montreal is organizing its first ever community hackathon!', 'title': 'CBC Montreal is organizing its first ever community hackathon!',
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.', 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
'timestamp': 1425704400, 'timestamp': 1425704400,
'upload_date': '20150307', 'upload_date': '20150307',
'uploader': 'CBCC-NEW', 'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'chapters': [], 'chapters': [],
'duration': 494.811, 'duration': 494.811,
'categories': ['AudioMobile/All in a Weekend Montreal'], 'categories': ['All in a Weekend Montreal'],
'tags': 'count:8', 'tags': 'count:11',
'location': 'Quebec', 'location': 'Quebec',
'series': 'All in a Weekend Montreal', 'series': 'All in a Weekend Montreal',
'season': 'Season 2015', 'season': 'Season 2015',
'season_number': 2015, 'season_number': 2015,
'media_type': 'Excerpt', 'media_type': 'Excerpt',
'genres': ['Other'],
}, },
}, { }, {
'url': 'https://www.cbc.ca/player/play/1.1711287', 'url': 'https://www.cbc.ca/player/play/1.1711287',
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': { 'info_dict': {
'id': '2164402062', 'id': '1.1711287',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Cancer survivor four times over', 'title': 'Cancer survivor four times over',
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
'timestamp': 1320410746, 'timestamp': 1320410746,
'upload_date': '20111104', 'upload_date': '20111104',
'uploader': 'CBCC-NEW', 'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'chapters': [], 'chapters': [],
'duration': 186.867, 'duration': 186.867,
'series': 'CBC News: Windsor at 6:00', 'series': 'CBC News: Windsor at 6:00',
'categories': ['News/Canada/Windsor'], 'categories': ['Windsor'],
'location': 'Windsor', 'location': 'Windsor',
'tags': ['cancer'], 'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
'creators': ['Allison Johnson'],
'media_type': 'Excerpt', 'media_type': 'Excerpt',
'genres': ['News'],
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
# Has subtitles # Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here: # These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
'url': 'https://www.cbc.ca/player/play/1.7159484', 'url': 'https://www.cbc.ca/player/play/video/9.6424403',
'md5': '6ed6cd0fc2ef568d2297ba68a763d455', 'md5': '8025909eaffcf0adf59922904def9a5e',
'info_dict': { 'info_dict': {
'id': '2324213316001', 'id': '9.6424403',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The National | School boards sue social media giants', 'title': 'The National | N.W.T. wildfire emergency',
'description': 'md5:4b4db69322fa32186c3ce426da07402c', 'description': 'md5:ada33d36d1df69347ed575905bfd496c',
'timestamp': 1711681200, 'timestamp': 1718589600,
'duration': 2743.400, 'duration': 2692.833,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, 'subtitles': {
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg', 'en-US': [{
'uploader': 'CBCC-NEW', 'name': 'English Captions',
'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt',
}],
},
'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg',
'chapters': 'count:5', 'chapters': 'count:5',
'upload_date': '20240329', 'upload_date': '20240617',
'categories': 'count:4', 'categories': ['News', 'The National', 'The National Latest Broadcasts'],
'series': 'The National - Full Show', 'series': 'The National - Full Show',
'tags': 'count:1', 'tags': ['The National'],
'creators': ['News'],
'location': 'Canada', 'location': 'Canada',
'media_type': 'Full Program', 'media_type': 'Full Program',
'genres': ['News'],
}, },
}, { }, {
'url': 'https://www.cbc.ca/player/play/video/1.7194274', 'url': 'https://www.cbc.ca/player/play/video/1.7194274',
'md5': '188b96cf6bdcb2540e178a6caa957128', 'md5': '188b96cf6bdcb2540e178a6caa957128',
'info_dict': { 'info_dict': {
'id': '2334524995812', 'id': '1.7194274',
'ext': 'mp4', 'ext': 'mp4',
'title': '#TheMoment a rare white spirit moose was spotted in Alberta', 'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3', 'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
'timestamp': 1714788791, 'timestamp': 1714788791,
'duration': 77.678, 'duration': 77.678,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg', 'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
'uploader': 'CBCC-NEW', 'chapters': [],
'chapters': 'count:0',
'upload_date': '20240504',
'categories': 'count:3', 'categories': 'count:3',
'series': 'The National', 'series': 'The National',
'tags': 'count:15', 'tags': 'count:17',
'creators': ['encoder'],
'location': 'Canada', 'location': 'Canada',
'media_type': 'Excerpt', 'media_type': 'Excerpt',
'upload_date': '20240504',
'genres': ['News'],
},
}, {
'url': 'https://www.cbc.ca/player/play/video/9.6427282',
'info_dict': {
'id': '9.6427282',
'ext': 'mp4',
'title': 'Men\'s Soccer - Argentina vs Morocco',
'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
'series': 'CBC Sports',
'media_type': 'Event Coverage',
'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
'timestamp': 1721825400.0,
'upload_date': '20240724',
'duration': 10568.0,
'chapters': [],
'genres': [],
'tags': ['2024 Paris Olympic Games'],
'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
'location': 'Canada',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.cbc.ca/player/play/video/9.6459530',
'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
'info_dict': {
'id': '9.6459530',
'ext': 'mp4',
'title': 'Parts of Jasper incinerated as wildfire rages',
'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
'series': 'The National',
'media_type': 'Excerpt',
'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
'timestamp': 1721964091.012,
'upload_date': '20240726',
'duration': 952.285,
'chapters': [],
'genres': [],
'tags': 'count:23',
'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
},
}, {
'url': 'https://www.cbc.ca/player/play/video/9.6420651',
'md5': '71a850c2c6ee5e912de169f5311bb533',
'info_dict': {
'id': '9.6420651',
'ext': 'mp4',
'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
'series': 'CBC News Edmonton',
'media_type': 'Excerpt',
'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
'timestamp': 1718220065.768,
'upload_date': '20240612',
'duration': 286.086,
'chapters': [],
'genres': ['News'],
'categories': ['News', 'Edmonton'],
'tags': 'count:7',
'location': 'Edmonton',
}, },
}, { }, {
'url': 'cbcplayer:1.7159484', 'url': 'cbcplayer:1.7159484',
@ -307,23 +376,113 @@ class CBCPlayerIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _parse_param(self, asset_data, name):
return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any))
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if '.' in video_id: webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id) data = self._search_json(
video_id = self._search_json( r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip']
r'window\.__INITIAL_STATE__\s*=', webpage, assets = traverse_obj(
'initial state', video_id)['video']['currentClip']['mediaId'] data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type']))
if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))):
# XXX: Deprecated; CBC is migrating off of ThePlatform
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
'force_smil_url': True,
}),
'id': media_id,
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
}
is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live'
formats, subtitles = [], {}
for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))):
subtitles.setdefault(sub.get('language') or 'und', []).append({
'url': sub['src'],
'name': sub.get('label'),
})
for asset in assets:
asset_key = asset['key']
asset_type = asset['type']
if asset_type != 'medianet':
self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}')
continue
asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON')
ext = mimetype2ext(self._parse_param(asset_data, 'contentType'))
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live)
formats.extend(fmts)
# Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
if not subtitles:
self._merge_subtitles(subs, target=subtitles)
if is_live or not fmts:
continue
# Check for direct https mp4 format
best_video_fmt = traverse_obj(fmts, (
lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all,
{functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {}
base_url = self._search_regex(
r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None)
if not base_url or '/live/' in base_url:
continue
mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4')
if self._request_webpage(
HEADRequest(mp4_url), video_id, 'Checking for https format',
errnote=False, fatal=False):
formats.append({
**best_video_fmt,
'url': mp4_url,
'format_id': 'https-mp4',
'protocol': 'https',
'manifest_url': None,
'acodec': None,
})
else:
formats.append({
'url': asset_data['url'],
'ext': ext,
'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None,
})
chapters = traverse_obj(data, (
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
'title': ('name', {str}),
}))
# Filter out pointless single chapters with start_time==0 and no end_time
if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')):
chapters = []
return { return {
'_type': 'url_transparent', **traverse_obj(data, {
'ie_key': 'ThePlatform', 'title': ('title', {str}),
'url': smuggle_url( 'description': ('description', {str.strip}),
f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', { 'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
'force_smil_url': True, 'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
}), 'media_type': ('media', 'clipType', {str}),
'series': ('showName', {str}),
'season_number': ('media', 'season', {int_or_none}),
'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}),
'location': ('media', 'region', {str}),
'tags': ('tags', ..., 'name', {str}),
'genres': ('media', 'genre', all),
'categories': ('categories', ..., 'name', {str}),
}),
'id': video_id, 'id': video_id,
'_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS 'formats': formats,
'subtitles': subtitles,
'chapters': chapters,
'is_live': is_live,
} }
@ -647,11 +806,11 @@ class CBCGemLiveIE(InfoExtractor):
'title': 'Ottawa', 'title': 'Ottawa',
'description': 'The live TV channel and local programming from Ottawa', 'description': 'The live TV channel and local programming from Ottawa',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
'is_live': True, 'live_status': 'is_live',
'id': 'AyqZwxRqh8EH', 'id': 'AyqZwxRqh8EH',
'ext': 'mp4', 'ext': 'mp4',
'timestamp': 1492106160, 'release_timestamp': 1492106160,
'upload_date': '20170413', 'release_date': '20170413',
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
}, },
'skip': 'Live might have ended', 'skip': 'Live might have ended',
@ -680,49 +839,84 @@ class CBCGemLiveIE(InfoExtractor):
'description': 'March 24, 2023 | President Bidens Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.', 'description': 'March 24, 2023 | President Bidens Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
'live_status': 'is_live', 'live_status': 'is_live',
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*', 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
'timestamp': 1679706000, 'release_timestamp': 1679706000,
'upload_date': '20230325', 'release_date': '20230325',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
'skip': 'Live might have ended', 'skip': 'Live might have ended',
}, },
{ # event replay (medianetlive)
'url': 'https://gem.cbc.ca/live-event/42314',
'md5': '297a9600f554f2258aed01514226a697',
'info_dict': {
'id': '42314',
'ext': 'mp4',
'live_status': 'was_live',
'title': 'Women\'s Soccer - Canada vs New Zealand',
'description': 'md5:36200e5f1a70982277b5a6ecea86155d',
'thumbnail': r're:https://.+default\.jpg',
'release_timestamp': 1721917200,
'release_date': '20240725',
},
'params': {'skip_download': True},
'skip': 'Replay might no longer be available',
},
{ # event replay (medianetlive)
'url': 'https://gem.cbc.ca/live-event/43273',
'only_matching': True,
},
] ]
_GEO_COUNTRIES = ['CA']
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data'] video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
# Two types of metadata JSON # Three types of video_info JSON: info in root, freeTv stream/item, event replay
if not video_info.get('formattedIdMedia'): if not video_info.get('formattedIdMedia'):
video_info = traverse_obj( if traverse_obj(video_info, ('event', 'key')) == video_id:
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}), video_info = video_info['event']
get_all=False, default={}) else:
video_info = traverse_obj(video_info, (
('freeTv', ('streams', ...)), 'items',
lambda _, v: v['key'].partition('-')[0] == video_id, any)) or {}
video_stream_id = video_info.get('formattedIdMedia') video_stream_id = video_info.get('formattedIdMedia')
if not video_stream_id: if not video_stream_id:
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) raise ExtractorError(
'Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
stream_data = self._download_json( live_status = 'was_live' if video_info.get('isVodEnabled') else 'is_live'
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={ release_timestamp = traverse_obj(video_info, ('airDate', {parse_iso8601}))
'appCode': 'mpx',
'connectionType': 'hd', if live_status == 'is_live' and release_timestamp and release_timestamp > time.time():
'deviceType': 'ipad', formats = []
'idMedia': video_stream_id, live_status = 'is_upcoming'
'multibitrate': 'true', self.raise_no_formats('This livestream has not yet started', expected=True)
'output': 'json', else:
'tech': 'hls', stream_data = self._download_json(
'manifestType': 'desktop', 'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
}) 'appCode': 'medianetlive',
'connectionType': 'hd',
'deviceType': 'ipad',
'idMedia': video_stream_id,
'multibitrate': 'true',
'output': 'json',
'tech': 'hls',
'manifestType': 'desktop',
})
formats = self._extract_m3u8_formats(
stream_data['url'], video_id, 'mp4', live=live_status == 'is_live')
return { return {
'id': video_id, 'id': video_id,
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True), 'formats': formats,
'is_live': True, 'live_status': live_status,
'release_timestamp': release_timestamp,
**traverse_obj(video_info, { **traverse_obj(video_info, {
'title': 'title', 'title': ('title', {str}),
'description': 'description', 'description': ('description', {str}),
'thumbnail': ('images', 'card', 'url'), 'thumbnail': ('images', 'card', 'url'),
'timestamp': ('airDate', {parse_iso8601}),
}), }),
} }

View file

@ -35,6 +35,7 @@ from ..networking import HEADRequest, Request
from ..networking.exceptions import ( from ..networking.exceptions import (
HTTPError, HTTPError,
IncompleteRead, IncompleteRead,
TransportError,
network_exceptions, network_exceptions,
) )
from ..networking.impersonate import ImpersonateTarget from ..networking.impersonate import ImpersonateTarget
@ -965,6 +966,9 @@ class InfoExtractor:
return False return False
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
encoding=encoding, data=data) encoding=encoding, data=data)
if content is False:
assert not fatal
return False
return (content, urlh) return (content, urlh)
@staticmethod @staticmethod
@ -1039,7 +1043,15 @@ class InfoExtractor:
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
prefix=None, encoding=None, data=None): prefix=None, encoding=None, data=None):
webpage_bytes = urlh.read() try:
webpage_bytes = urlh.read()
except TransportError as err:
errmsg = f'{video_id}: Error reading response: {err.msg}'
if fatal:
raise ExtractorError(errmsg, cause=err)
self.report_warning(errmsg)
return False
if prefix is not None: if prefix is not None:
webpage_bytes = prefix + webpage_bytes webpage_bytes = prefix + webpage_bytes
if self.get_param('dump_intermediate_pages', False): if self.get_param('dump_intermediate_pages', False):
@ -1698,7 +1710,7 @@ class InfoExtractor:
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none) rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
if rating is not None: if rating is not None:
info['average_rating'] = rating info['average_rating'] = rating
if is_type(e, 'TVEpisode', 'Episode'): if is_type(e, 'TVEpisode', 'Episode', 'PodcastEpisode'):
episode_name = unescapeHTML(e.get('name')) episode_name = unescapeHTML(e.get('name'))
info.update({ info.update({
'episode': episode_name, 'episode': episode_name,
@ -2065,7 +2077,7 @@ class InfoExtractor:
has_drm = HlsFD._has_drm(m3u8_doc) has_drm = HlsFD._has_drm(m3u8_doc)
def format_url(url): def format_url(url):
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url) return url if re.match(r'https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False): if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None): def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@ -2800,11 +2812,11 @@ class InfoExtractor:
base_url_e = element.find(_add_ns('BaseURL')) base_url_e = element.find(_add_ns('BaseURL'))
if try_call(lambda: base_url_e.text) is not None: if try_call(lambda: base_url_e.text) is not None:
base_url = base_url_e.text + base_url base_url = base_url_e.text + base_url
if re.match(r'^https?://', base_url): if re.match(r'https?://', base_url):
break break
if mpd_base_url and base_url.startswith('/'): if mpd_base_url and base_url.startswith('/'):
base_url = urllib.parse.urljoin(mpd_base_url, base_url) base_url = urllib.parse.urljoin(mpd_base_url, base_url)
elif mpd_base_url and not re.match(r'^https?://', base_url): elif mpd_base_url and not re.match(r'https?://', base_url):
if not mpd_base_url.endswith('/'): if not mpd_base_url.endswith('/'):
mpd_base_url += '/' mpd_base_url += '/'
base_url = mpd_base_url + base_url base_url = mpd_base_url + base_url
@ -2894,7 +2906,7 @@ class InfoExtractor:
} }
def location_key(location): def location_key(location):
return 'url' if re.match(r'^https?://', location) else 'path' return 'url' if re.match(r'https?://', location) else 'path'
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
@ -3150,7 +3162,7 @@ class InfoExtractor:
}) })
return formats, subtitles return formats, subtitles
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None): def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None):
def absolute_url(item_url): def absolute_url(item_url):
return urljoin(base_url, item_url) return urljoin(base_url, item_url)
@ -3174,11 +3186,11 @@ class InfoExtractor:
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4', full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id, entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
preference=preference, quality=quality, fatal=False) preference=preference, quality=quality, fatal=False, headers=_headers)
elif ext == 'mpd': elif ext == 'mpd':
is_plain_url = False is_plain_url = False
formats = self._extract_mpd_formats( formats = self._extract_mpd_formats(
full_url, video_id, mpd_id=mpd_id, fatal=False) full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers)
else: else:
is_plain_url = True is_plain_url = True
formats = [{ formats = [{
@ -3272,6 +3284,8 @@ class InfoExtractor:
}) })
for f in media_info['formats']: for f in media_info['formats']:
f.setdefault('http_headers', {})['Referer'] = base_url f.setdefault('http_headers', {})['Referer'] = base_url
if _headers:
f['http_headers'].update(_headers)
if media_info['formats'] or media_info['subtitles']: if media_info['formats'] or media_info['subtitles']:
entries.append(media_info) entries.append(media_info)
return entries return entries
@ -3487,7 +3501,7 @@ class InfoExtractor:
continue continue
urls.add(source_url) urls.add(source_url)
source_type = source.get('type') or '' source_type = source.get('type') or ''
ext = mimetype2ext(source_type) or determine_ext(source_url) ext = determine_ext(source_url, default_ext=mimetype2ext(source_type))
if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url: if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url:
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', entry_protocol='m3u8_native', source_url, video_id, 'mp4', entry_protocol='m3u8_native',

View file

@ -319,32 +319,6 @@ class DPlayIE(DPlayBaseIE):
url, display_id, host, 'dplay' + country, country, domain) url, display_id, host, 'dplay' + country, country, domain)
class HGTVDeIE(DPlayBaseIE):
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
'info_dict': {
'id': '151205',
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
'ext': 'mp4',
'title': 'Wer braucht schon eine Toilette',
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
'duration': 1177.024,
'timestamp': 1595705400,
'upload_date': '20200725',
'creator': 'HGTV',
'series': 'Tiny House - klein, aber oho',
'season_number': 3,
'episode_number': 3,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
return self._get_disco_api_info(
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
class DiscoveryPlusBaseIE(DPlayBaseIE): class DiscoveryPlusBaseIE(DPlayBaseIE):
"""Subclasses must set _PRODUCT, _DISCO_API_PARAMS""" """Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
@ -373,6 +347,45 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS) return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS)
class HGTVDeIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
'info_dict': {
'id': '7332936',
'ext': 'mp4',
'display_id': 'mein-kleinstadt-traumhaus/vom-landleben-ins-loft',
'title': 'Vom Landleben ins Loft',
'description': 'md5:e5f72c02c853970796dd3818f2e25745',
'episode': 'Episode 7',
'episode_number': 7,
'season': 'Season 7',
'season_number': 7,
'series': 'Mein Kleinstadt-Traumhaus',
'duration': 2645.0,
'timestamp': 1725998100,
'upload_date': '20240910',
'creators': ['HGTV'],
'tags': [],
'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/08/09/82a386b9-c688-32c7-b9ff-0b13865f0bae.jpeg',
},
}]
_PRODUCT = 'hgtv'
_DISCO_API_PARAMS = {
'disco_host': 'eu1-prod.disco-api.com',
'realm': 'hgtv',
'country': 'de',
}
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
'x-disco-params': f'realm={realm}',
'x-disco-client': 'Alps:HyogaPlayer:0.0.0',
'Authorization': self._get_auth(disco_base, display_id, realm),
})
class GoDiscoveryIE(DiscoveryPlusBaseIE): class GoDiscoveryIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX _VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{ _TESTS = [{
@ -934,7 +947,7 @@ class TLCIE(DiscoveryPlusBaseIE):
class DiscoveryPlusIE(DiscoveryPlusBaseIE): class DiscoveryPlusIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport)?' + DPlayBaseIE._PATH_REGEX _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
_TESTS = [{ _TESTS = [{
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
'info_dict': { 'info_dict': {
@ -958,6 +971,9 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE):
}, { }, {
'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review', 'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa',
'only_matching': True,
}] }]
_PRODUCT = None _PRODUCT = None
@ -1144,13 +1160,19 @@ class DiscoveryPlusShowBaseIE(DPlayBaseIE):
class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE): class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
_TESTS = [{ _TESTS = [{
'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi', 'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer', 'url': 'https://www.discoveryplus.com/it/video/super-benny/trailer',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.discoveryplus.com/it/video/olympics/dplus-sport-dplus-sport-sport/water-polo-greece-italy',
'only_matching': True,
}, {
'url': 'https://www.discoveryplus.com/it/video/sport/dplus-sport-dplus-sport-sport/lisa-vittozzi-allinferno-e-ritorno',
'only_matching': True,
}] }]
_PRODUCT = 'dplus_it' _PRODUCT = 'dplus_it'

View file

@ -6,8 +6,10 @@ import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
update_url,
update_url_query, update_url_query,
url_basename, url_basename,
urlencode_postdata,
) )
@ -36,43 +38,58 @@ class DropboxIE(InfoExtractor):
}, },
] ]
def _yield_decoded_parts(self, webpage):
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
yield base64.b64decode(encoded).decode('utf-8', 'ignore')
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
fn = urllib.parse.unquote(url_basename(url)) fn = urllib.parse.unquote(url_basename(url))
title = os.path.splitext(fn)[0] title = os.path.splitext(fn)[0]
password = self.get_param('videopassword') password = self.get_param('videopassword')
if (self._og_search_title(webpage) == 'Dropbox - Password Required'
or 'Enter the password for this link' in webpage):
for part in self._yield_decoded_parts(webpage):
if '/sm/password' in part:
webpage = self._download_webpage(
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
break
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
or 'Enter the password for this link' in webpage):
if password: if password:
content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id')
payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}'
response = self._download_json( response = self._download_json(
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(), 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}) headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
data=urlencode_postdata({
'is_xhr': 'true',
't': self._get_cookies('https://www.dropbox.com')['t'].value,
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
'password': password,
'url': url,
}))
if response.get('status') != 'authed': if response.get('status') != 'authed':
raise ExtractorError('Authentication failed!', expected=True) raise ExtractorError('Invalid password', expected=True)
webpage = self._download_webpage(url, video_id) elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
elif self._get_cookies('https://dropbox.com').get('sm_auth'):
webpage = self._download_webpage(url, video_id)
else:
raise ExtractorError('Password protected video, use --video-password <password>', expected=True) raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
webpage = self._download_webpage(url, video_id)
formats, subtitles, has_anonymous_download = [], {}, False formats, subtitles = [], {}
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)): has_anonymous_download = False
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore') thumbnail = None
for part in self._yield_decoded_parts(webpage):
if not has_anonymous_download: if not has_anonymous_download:
has_anonymous_download = self._search_regex( has_anonymous_download = self._search_regex(
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False) r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
transcode_url = self._search_regex( transcode_url = self._search_regex(
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None) r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
if not transcode_url: if not transcode_url:
continue continue
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4') formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
thumbnail = self._search_regex(
r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
break break
# downloads enabled we can get the original file # downloads enabled we can get the original file
@ -89,4 +106,5 @@ class DropboxIE(InfoExtractor):
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnail': thumbnail,
} }

View file

@ -17,6 +17,7 @@ from ..utils import (
url_or_none, url_or_none,
variadic, variadic,
) )
from ..utils.traversal import traverse_obj
class ERTFlixBaseIE(InfoExtractor): class ERTFlixBaseIE(InfoExtractor):
@ -74,29 +75,28 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
def _extract_formats_and_subs(self, video_id): def _extract_formats_and_subs(self, video_id):
media_info = self._call_api(video_id, codename=video_id) media_info = self._call_api(video_id, codename=video_id)
formats, subs = [], {} formats, subtitles = [], {}
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []: for media in traverse_obj(media_info, (
for media in try_get(media_file, lambda x: x['Formats'], list) or []: 'MediaFiles', lambda _, v: v['RoleCodename'] == 'main',
fmt_url = url_or_none(try_get(media, lambda x: x['Url'])) 'Formats', lambda _, v: url_or_none(v['Url']))):
if not fmt_url: fmt_url = media['Url']
continue ext = determine_ext(fmt_url)
ext = determine_ext(fmt_url) if ext == 'm3u8':
if ext == 'm3u8': fmts, subs = self._extract_m3u8_formats_and_subtitles(
formats_, subs_ = self._extract_m3u8_formats_and_subtitles( fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False)
fmt_url, video_id, m3u8_id='hls', ext='mp4', fatal=False) elif ext == 'mpd':
elif ext == 'mpd': fmts, subs = self._extract_mpd_formats_and_subtitles(
formats_, subs_ = self._extract_mpd_formats_and_subtitles( fmt_url, video_id, mpd_id='dash', fatal=False)
fmt_url, video_id, mpd_id='dash', fatal=False) else:
else: formats.append({
formats.append({ 'url': fmt_url,
'url': fmt_url, 'format_id': str_or_none(media.get('Id')),
'format_id': str_or_none(media.get('Id')), })
}) continue
continue formats.extend(fmts)
formats.extend(formats_) self._merge_subtitles(subs, target=subtitles)
self._merge_subtitles(subs_, target=subs)
return formats, subs return formats, subtitles
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View file

@ -294,37 +294,37 @@ class ESPNCricInfoIE(InfoExtractor):
class WatchESPNIE(AdobePassIE): class WatchESPNIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})' _VALID_URL = r'https?://(?:www\.)?espn\.com/(?:watch|espnplus)/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
_TESTS = [{ _TESTS = [{
'url': 'https://www.espn.com/watch/player/_/id/dbbc6b1d-c084-4b47-9878-5f13c56ce309', 'url': 'https://www.espn.com/watch/player/_/id/11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
'info_dict': { 'info_dict': {
'id': 'dbbc6b1d-c084-4b47-9878-5f13c56ce309', 'id': '11ce417a-6ac9-42b6-8a15-46aeb9ad5710',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Huddersfield vs. Burnley', 'title': 'Abilene Chrstn vs. Texas Tech',
'duration': 7500, 'duration': 14166,
'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/dbbc6b1d-c084-4b47-9878-5f13c56ce309/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs', 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/11ce417a-6ac9-42b6-8a15-46aeb9ad5710/16x9.jpg?timestamp=202407252343&showBadge=true&cb=12&package=ESPN_PLUS',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'https://www.espn.com/watch/player/_/id/a049a56e-a7ce-477e-aef3-c7e48ef8221c', 'url': 'https://www.espn.com/watch/player/_/id/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
'info_dict': { 'info_dict': {
'id': 'a049a56e-a7ce-477e-aef3-c7e48ef8221c', 'id': '90a2c85d-75e0-4b1e-a878-8e428a3cb2f3',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dynamo Dresden vs. VfB Stuttgart (Round #1) (German Cup)', 'title': 'UC Davis vs. California',
'duration': 8335, 'duration': 9547,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS', 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/90a2c85d-75e0-4b1e-a878-8e428a3cb2f3/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'https://www.espn.com/espnplus/player/_/id/317f5fd1-c78a-4ebe-824a-129e0d348421', 'url': 'https://www.espn.com/watch/player/_/id/c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
'info_dict': { 'info_dict': {
'id': '317f5fd1-c78a-4ebe-824a-129e0d348421', 'id': 'c4313bbe-95b5-4bb8-b251-ac143ea0fc54',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Wheel - Episode 10', 'title': 'The College Football Show',
'duration': 3352, 'duration': 3639,
'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/317f5fd1-c78a-4ebe-824a-129e0d348421/16x9.jpg?timestamp=202205031523&showBadge=true&cb=12&package=ESPN_PLUS', 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/c4313bbe-95b5-4bb8-b251-ac143ea0fc54/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -353,6 +353,13 @@ class WatchESPNIE(AdobePassIE):
if not cookie: if not cookie:
self.raise_login_required(method='cookies') self.raise_login_required(method='cookies')
jwt = self._search_regex(r'=([^|]+)\|', cookie.value, 'cookie jwt')
id_token = self._download_json(
'https://registerdisney.go.com/jgc/v6/client/ESPN-ONESITE.WEB-PROD/guest/refresh-auth',
None, 'Refreshing token', headers={'Content-Type': 'application/json'}, data=json.dumps({
'refreshToken': json.loads(base64.urlsafe_b64decode(f'{jwt}==='))['refresh_token'],
}).encode())['data']['token']['id_token']
assertion = self._call_bamgrid_api( assertion = self._call_bamgrid_api(
'devices', video_id, 'devices', video_id,
headers={'Content-Type': 'application/json; charset=UTF-8'}, headers={'Content-Type': 'application/json; charset=UTF-8'},
@ -371,7 +378,7 @@ class WatchESPNIE(AdobePassIE):
})['access_token'] })['access_token']
assertion = self._call_bamgrid_api( assertion = self._call_bamgrid_api(
'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]}, 'accounts/grant', video_id, payload={'id_token': id_token},
headers={ headers={
'Authorization': token, 'Authorization': token,
'Content-Type': 'application/json; charset=UTF-8', 'Content-Type': 'application/json; charset=UTF-8',

View file

@ -3,7 +3,12 @@ from ..utils import traverse_obj
class EurosportIE(InfoExtractor): class EurosportIE(InfoExtractor):
_VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?P<id>vid\d+)' _VALID_URL = r'''(?x)
https?://(?:
(?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
eurosport\.tvn24\.pl
)/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml', 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
'info_dict': { 'info_dict': {
@ -70,6 +75,42 @@ class EurosportIE(InfoExtractor):
'duration': 105.0, 'duration': 105.0,
'upload_date': '20230518', 'upload_date': '20230518',
}, },
}, {
'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
'only_matching': True,
}, {
'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
'only_matching': True,
}, {
'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
'only_matching': True,
}] }]
_TOKEN = None _TOKEN = None
@ -77,6 +118,7 @@ class EurosportIE(InfoExtractor):
# actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 .. # actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
# but this method require to get sha256 hash # but this method require to get sha256 hash
_GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work _GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
_GEO_BYPASS = False
def _real_initialize(self): def _real_initialize(self):
if EurosportIE._TOKEN is None: if EurosportIE._TOKEN is None:
@ -98,13 +140,13 @@ class EurosportIE(InfoExtractor):
for stream_type in json_data['attributes']['streaming']: for stream_type in json_data['attributes']['streaming']:
if stream_type == 'hls': if stream_type == 'hls':
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4') traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
elif stream_type == 'dash': elif stream_type == 'dash':
fmts, subs = self._extract_mpd_formats_and_subtitles( fmts, subs = self._extract_mpd_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id) traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
elif stream_type == 'mss': elif stream_type == 'mss':
fmts, subs = self._extract_ism_formats_and_subtitles( fmts, subs = self._extract_ism_formats_and_subtitles(
traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id) traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)

View file

@ -84,7 +84,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1692346159, 'timestamp': 1692346159,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'uploader_id': '100063551323670', 'uploader_id': '100063551323670',
'duration': 3132.184, 'duration': 3133.583,
'view_count': int, 'view_count': int,
'concurrent_view_count': 0, 'concurrent_view_count': 0,
}, },
@ -112,9 +112,10 @@ class FacebookIE(InfoExtractor):
'upload_date': '20140506', 'upload_date': '20140506',
'timestamp': 1399398998, 'timestamp': 1399398998,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'uploader_id': 'pfbid028wxorhX2ErLFJ578N6P3crHD3PHmXTCqCvfBpsnbSLmbokwSY75p5hWBjHGkG4zxl', 'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
'duration': 131.03, 'duration': 131.03,
'concurrent_view_count': int, 'concurrent_view_count': int,
'view_count': int,
}, },
}, { }, {
'note': 'Video with DASH manifest', 'note': 'Video with DASH manifest',
@ -167,7 +168,7 @@ class FacebookIE(InfoExtractor):
# have 1080P, but only up to 720p in swf params # have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media # data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': 'ca63897a90c9452efee5f8c40d080e25', 'md5': '1659aa21fb3dd1585874f668e81a72c8',
'info_dict': { 'info_dict': {
'id': '10155529876156509', 'id': '10155529876156509',
'ext': 'mp4', 'ext': 'mp4',
@ -180,9 +181,10 @@ class FacebookIE(InfoExtractor):
'view_count': int, 'view_count': int,
'uploader_id': '100059479812265', 'uploader_id': '100059479812265',
'concurrent_view_count': int, 'concurrent_view_count': int,
'duration': 44.478, 'duration': 44.181,
}, },
}, { }, {
# FIXME: unable to extract uploader, no formats found
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/', 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
@ -241,9 +243,9 @@ class FacebookIE(InfoExtractor):
'timestamp': 1511548260, 'timestamp': 1511548260,
'upload_date': '20171124', 'upload_date': '20171124',
'uploader': 'Vickie Gentry', 'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FuZhHCeWDAxWxEbr3yKPFaRstXvRxgsp9uCPG6GjD4J2AitB35NUAuJ4Q75KcjiDl', 'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'duration': 148.435, 'duration': 148.224,
}, },
}, { }, {
# data.node.comet_sections.content.story.attachments[].styles.attachment.media # data.node.comet_sections.content.story.attachments[].styles.attachment.media
@ -271,7 +273,7 @@ class FacebookIE(InfoExtractor):
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...', 'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'uploader': 'Lela Evans', 'uploader': 'Lela Evans',
'uploader_id': 'pfbid0shZJipuigyy5mqrUJn9ub5LJFWNHvan5prtyi3LrDuuuJ4NwrURgnQHYR9fywBepl', 'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl',
'upload_date': '20231228', 'upload_date': '20231228',
'timestamp': 1703804085, 'timestamp': 1703804085,
'duration': 394.347, 'duration': 394.347,
@ -322,7 +324,7 @@ class FacebookIE(InfoExtractor):
'upload_date': '20180523', 'upload_date': '20180523',
'uploader': 'ESL One Dota 2', 'uploader': 'ESL One Dota 2',
'uploader_id': '100066514874195', 'uploader_id': '100066514874195',
'duration': 4524.212, 'duration': 4524.001,
'view_count': int, 'view_count': int,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'concurrent_view_count': int, 'concurrent_view_count': int,
@ -339,9 +341,9 @@ class FacebookIE(InfoExtractor):
'title': 'Josef', 'title': 'Josef',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'concurrent_view_count': int, 'concurrent_view_count': int,
'uploader_id': 'pfbid0cibUN6tV7DYgdbJdsUFN46wc4jKpVSPAvJQhFofGqBGmVn3V3JtAs2tfUwziw2hUl', 'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl',
'timestamp': 1549275572, 'timestamp': 1549275572,
'duration': 3.413, 'duration': 3.283,
'uploader': 'Josef Novak', 'uploader': 'Josef Novak',
'description': '', 'description': '',
'upload_date': '20190204', 'upload_date': '20190204',
@ -396,6 +398,7 @@ class FacebookIE(InfoExtractor):
'playlist_count': 1, 'playlist_count': 1,
'skip': 'Requires logging in', 'skip': 'Requires logging in',
}, { }, {
# FIXME: Cannot parse data error
# data.event.cover_media_renderer.cover_video # data.event.cover_media_renderer.cover_video
'url': 'https://m.facebook.com/events/1509582499515440', 'url': 'https://m.facebook.com/events/1509582499515440',
'info_dict': { 'info_dict': {
@ -498,7 +501,8 @@ class FacebookIE(InfoExtractor):
or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name'])) or get_first(post, ('video', 'creation_story', 'attachments', ..., 'media', lambda k, v: k == 'owner' and v['name']))
or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name']))
or get_first(post, ('node', 'actors', ..., {dict})) or get_first(post, ('node', 'actors', ..., {dict}))
or get_first(post, ('event', 'event_creator', {dict})) or {}) or get_first(post, ('event', 'event_creator', {dict}))
or get_first(post, ('video', 'creation_story', 'short_form_video_context', 'video_owner', {dict})) or {})
uploader = uploader_data.get('name') or ( uploader = uploader_data.get('name') or (
clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
or self._search_regex( or self._search_regex(
@ -524,6 +528,11 @@ class FacebookIE(InfoExtractor):
webpage, 'view count', default=None)), webpage, 'view count', default=None)),
'concurrent_view_count': get_first(post, ( 'concurrent_view_count': get_first(post, (
('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})), ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})),
**traverse_obj(post, (lambda _, v: video_id in v['url'], 'feedback', {
'like_count': ('likers', 'count', {int}),
'comment_count': ('total_comment_count', {int}),
'repost_count': ('share_count_reduced', {parse_count}),
}), get_all=False),
} }
info_json_ld = self._search_json_ld(webpage, video_id, default={}) info_json_ld = self._search_json_ld(webpage, video_id, default={})
@ -571,16 +580,21 @@ class FacebookIE(InfoExtractor):
# Formats larger than ~500MB will return error 403 unless chunk size is regulated # Formats larger than ~500MB will return error 403 unless chunk size is regulated
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20 f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
def extract_relay_data(_filter): def yield_all_relay_data(_filter):
return self._parse_json(self._search_regex( for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage):
rf'data-sjs>({{.*?{_filter}.*?}})</script>', yield self._parse_json(relay_data, video_id, fatal=False) or {}
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
def extract_relay_prefetched_data(_filter): def extract_relay_data(_filter):
return traverse_obj(extract_relay_data(_filter), ( return next(filter(None, yield_all_relay_data(_filter)), {})
'require', (None, (..., ..., ..., '__bbox', 'require')),
def extract_relay_prefetched_data(_filter, target_keys=None):
path = 'data'
if target_keys is not None:
path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys))
return traverse_obj(yield_all_relay_data(_filter), (
..., 'require', (None, (..., ..., ..., '__bbox', 'require')),
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v), lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {} ..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([
@ -591,7 +605,8 @@ class FacebookIE(InfoExtractor):
if not video_data: if not video_data:
data = extract_relay_prefetched_data( data = extract_relay_prefetched_data(
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)') r'"(?:dash_manifest|playable_url(?:_quality_hd)?)',
target_keys=('video', 'event', 'nodes', 'node', 'mediaset'))
if data: if data:
entries = [] entries = []
@ -926,18 +941,21 @@ class FacebookReelIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/reel/1195289147628387', 'url': 'https://www.facebook.com/reel/1195289147628387',
'md5': 'f13dd37f2633595982db5ed8765474d3', 'md5': 'a53256d10fc2105441fe0c4212ed8cea',
'info_dict': { 'info_dict': {
'id': '1195289147628387', 'id': '1195289147628387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e', 'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$',
'description': 'md5:22f03309b216ac84720183961441d8db', 'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$',
'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1', 'uploader': 'Beast Camp Training',
'uploader_id': '100040874179269', 'uploader_id': '100040874179269',
'duration': 9.579, 'duration': 9.579,
'timestamp': 1637502609, 'timestamp': 1637502609,
'upload_date': '20211121', 'upload_date': '20211121',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'like_count': int,
'comment_count': int,
'repost_count': int,
}, },
}] }]
@ -957,6 +975,7 @@ class FacebookAdsIE(InfoExtractor):
'id': '899206155126718', 'id': '899206155126718',
'ext': 'mp4', 'ext': 'mp4',
'title': 'video by Kandao', 'title': 'video by Kandao',
'description': 'md5:0822724069e3aca97cbed5dabbab282e',
'uploader': 'Kandao', 'uploader': 'Kandao',
'uploader_id': '774114102743284', 'uploader_id': '774114102743284',
'uploader_url': r're:^https?://.*', 'uploader_url': r're:^https?://.*',
@ -965,6 +984,22 @@ class FacebookAdsIE(InfoExtractor):
'upload_date': '20231214', 'upload_date': '20231214',
'like_count': int, 'like_count': int,
}, },
}, {
# key 'watermarked_video_sd_url' missing
'url': 'https://www.facebook.com/ads/library/?id=501152689226254',
'info_dict': {
'id': '501152689226254',
'ext': 'mp4',
'title': 'video by mat.nawrocki',
'description': 'md5:02a446ace7ff8c3c37a2892922492490',
'uploader': 'mat.nawrocki',
'uploader_id': '148586968341456',
'uploader_url': r're:^https?://.*',
'timestamp': 1723452305,
'thumbnail': r're:^https?://.*',
'upload_date': '20240812',
'like_count': int,
},
}, { }, {
'url': 'https://www.facebook.com/ads/library/?id=893637265423481', 'url': 'https://www.facebook.com/ads/library/?id=893637265423481',
'info_dict': { 'info_dict': {
@ -1011,34 +1046,42 @@ class FacebookAdsIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
post_data = [self._parse_json(j, video_id, fatal=False) post_data = traverse_obj(
for j in re.findall(r's\.handle\(({.*})\);requireLazy\(', webpage)] re.findall(r'data-sjs>({.*?ScheduledServerJS.*?})</script>', webpage), (..., {json.loads}))
data = traverse_obj(post_data, ( data = get_first(post_data, (
..., 'require', ..., ..., ..., 'props', 'deeplinkAdCard', 'snapshot', {dict}), get_all=False) 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ...,
'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict}))
if not data: if not data:
raise ExtractorError('Unable to extract ad data') raise ExtractorError('Unable to extract ad data')
title = data.get('title') title = data.get('title')
if not title or title == '{{product.name}}': if not title or title == '{{product.name}}':
title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data) title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data)
markup_id = traverse_obj(data, ('body', '__m', {str}))
markup = traverse_obj(post_data, (
..., 'require', ..., ..., ..., '__bbox', 'markup', lambda _, v: v[0].startswith(markup_id),
..., '__html', {clean_html}, {lambda x: not x.startswith('{{product.') and x}, any))
info_dict = traverse_obj(data, { info_dict = merge_dicts({
'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}), 'title': title,
'description': markup or None,
}, traverse_obj(data, {
'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}),
'uploader': ('page_name', {str}), 'uploader': ('page_name', {str}),
'uploader_id': ('page_id', {str_or_none}), 'uploader_id': ('page_id', {str_or_none}),
'uploader_url': ('page_profile_uri', {url_or_none}), 'uploader_url': ('page_profile_uri', {url_or_none}),
'timestamp': ('creation_time', {int_or_none}), 'timestamp': ('creation_time', {int_or_none}),
'like_count': ('page_like_count', {int_or_none}), 'like_count': ('page_like_count', {int_or_none}),
}) }))
entries = [] entries = []
for idx, entry in enumerate(traverse_obj( for idx, entry in enumerate(traverse_obj(
data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1, data, (('videos', 'cards'), lambda _, v: any(url_or_none(v.get(f)) for f in self._FORMATS_MAP))), 1,
): ):
entries.append({ entries.append({
'id': f'{video_id}_{idx}', 'id': f'{video_id}_{idx}',
'title': entry.get('title') or title, 'title': entry.get('title') or title,
'description': entry.get('link_description') or info_dict.get('description'), 'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'),
'thumbnail': url_or_none(entry.get('video_preview_image_url')), 'thumbnail': url_or_none(entry.get('video_preview_image_url')),
'formats': self._extract_formats(entry), 'formats': self._extract_formats(entry),
}) })

View file

@ -14,7 +14,7 @@ from ..utils import (
class FC2IE(InfoExtractor): class FC2IE(InfoExtractor):
_VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)' _VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P<id>[^/]+)'
IE_NAME = 'fc2' IE_NAME = 'fc2'
_NETRC_MACHINE = 'fc2' _NETRC_MACHINE = 'fc2'
_TESTS = [{ _TESTS = [{

View file

@ -2340,7 +2340,7 @@ class GenericIE(InfoExtractor):
default_search = 'fixup_error' default_search = 'fixup_error'
if default_search in ('auto', 'auto_warning', 'fixup_error'): if default_search in ('auto', 'auto_warning', 'fixup_error'):
if re.match(r'^[^\s/]+\.[^\s/]+/', url): if re.match(r'[^\s/]+\.[^\s/]+/', url):
self.report_warning('The url doesn\'t specify the protocol, trying with http') self.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url) return self.url_result('http://' + url)
elif default_search != 'fixup_error': elif default_search != 'fixup_error':
@ -2400,7 +2400,7 @@ class GenericIE(InfoExtractor):
# Check for direct link to a video # Check for direct link to a video
content_type = full_response.headers.get('Content-Type', '').lower() content_type = full_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type) m = re.match(r'(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m: if m:
self.report_detected('direct video link') self.report_detected('direct video link')
headers = filter_dict({'Referer': smuggled_data.get('referer')}) headers = filter_dict({'Referer': smuggled_data.get('referer')})

View file

@ -0,0 +1,91 @@
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..utils import (
parse_qs,
traverse_obj,
url_or_none,
)
class GermanupaIE(InfoExtractor):
IE_DESC = 'germanupa.de'
_VALID_URL = r'https?://germanupa\.de/mediathek/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen',
'info_dict': {
'id': '909179246',
'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen',
'ext': 'mp4',
'uploader': 'German UPA',
'uploader_id': 'germanupa',
'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280',
'uploader_url': 'https://vimeo.com/germanupa',
'duration': 3987,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
'params': {'skip_download': 'm3u8'},
}, {
'note': 'audio, uses GenericIE',
'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable',
'info_dict': {
'id': '1867346676',
'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX',
'ext': 'opus',
'timestamp': 1720545088,
'upload_date': '20240709',
'duration': 3910.557,
'like_count': int,
'description': 'md5:db2aed5ff131e177a7b33901e9a8db05',
'uploader': 'German UPA',
'repost_count': int,
'genres': ['Science'],
'license': 'all-rights-reserved',
'uploader_url': 'https://soundcloud.com/user-80097677',
'uploader_id': '471579486',
'view_count': int,
'comment_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg',
},
}, {
'note': 'Nur für Mitglieder/Just for members',
'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai',
'info_dict': {
'id': '986994430',
'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber',
'ext': 'mp4',
'release_date': '20240719',
'uploader_url': 'https://vimeo.com/germanupa',
'timestamp': 1721373980,
'license': 'by-sa',
'like_count': int,
'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280',
'duration': 2146,
'release_timestamp': 1721373980,
'uploader': 'German UPA',
'uploader_id': 'germanupa',
'upload_date': '20240719',
'comment_count': int,
},
'expected_warnings': ['Failed to parse XML: not well-formed'],
'skip': 'login required',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
param_url = traverse_obj(
self._search_regex(
r'<iframe[^>]+data-src\s*?=\s*?([\'"])(?P<url>https://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1',
webpage, 'embedded video', default=None, group='url'),
({parse_qs}, 'url', 0, {url_or_none}))
if not param_url:
if self._search_regex(
r'<div[^>]+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1',
webpage, 'login wrapper', default=None):
self.raise_login_required('This video is only available for members')
return self.url_result(url, 'Generic') # Fall back to generic to extract audio
real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/')
return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id)

View file

@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor):
_BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})' _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
_VALID_URL = [ _VALID_URL = [
rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)', rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)', rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
] ]
_TESTS = [{ _TESTS = [{
'url': 'http://academymel.online/3video_1', 'url': 'http://academymel.online/3video_1',

View file

@ -7,7 +7,7 @@ from ..utils import (
class GolemIE(InfoExtractor): class GolemIE(InfoExtractor):
_VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/' _VALID_URL = r'https?://video\.golem\.de/.+?/(?P<id>.+?)/'
_TEST = { _TEST = {
'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html', 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
'md5': 'c1a2c0a3c863319651c7c992c5ee29bf', 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',

View file

@ -13,7 +13,7 @@ from ..utils import (
class HRFernsehenIE(InfoExtractor): class HRFernsehenIE(InfoExtractor):
IE_NAME = 'hrfernsehen' IE_NAME = 'hrfernsehen'
_VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html' _VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
_TESTS = [{ _TESTS = [{
'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html', 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
'md5': '5c4e0ba94677c516a2f65a84110fc536', 'md5': '5c4e0ba94677c516a2f65a84110fc536',

View file

@ -8,15 +8,19 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration,
str_or_none, str_or_none,
try_get, try_get,
unescapeHTML, unescapeHTML,
unified_strdate,
update_url_query, update_url_query,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class HuyaLiveIE(InfoExtractor): class HuyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)' _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
IE_NAME = 'huya:live' IE_NAME = 'huya:live'
IE_DESC = 'huya.com' IE_DESC = 'huya.com'
TESTS = [{ TESTS = [{
@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '572329', 'id': '572329',
'title': str, 'title': str,
'ext': 'flv',
'description': str, 'description': str,
'is_live': True, 'is_live': True,
'view_count': int, 'view_count': int,
@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor):
fm = base64.b64decode(params['fm']).decode().split('_', 1)[0] fm = base64.b64decode(params['fm']).decode().split('_', 1)[0]
ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']])) ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']]))
return fm, ss return fm, ss
class HuyaVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P<id>\d+)\.html'
IE_NAME = 'huya:video'
IE_DESC = '虎牙视频'
_TESTS = [{
'url': 'https://www.huya.com/video/play/1002412640.html',
'info_dict': {
'id': '1002412640',
'ext': 'mp4',
'title': '8月3日',
'thumbnail': r're:https?://.*\.jpg',
'duration': 14,
'uploader': '虎牙-ATS欧卡车队青木',
'uploader_id': '1564376151',
'upload_date': '20240803',
'view_count': int,
'comment_count': int,
'like_count': int,
},
},
{
'url': 'https://www.huya.com/video/play/556054543.html',
'info_dict': {
'id': '556054543',
'ext': 'mp4',
'title': '我不挑事 也不怕事',
'thumbnail': r're:https?://.*\.jpg',
'duration': 1864,
'uploader': '卡尔',
'uploader_id': '367138632',
'upload_date': '20210811',
'view_count': int,
'comment_count': int,
'like_count': int,
},
}]
def _real_extract(self, url: str):
video_id = self._match_id(url)
video_data = self._download_json(
'https://liveapi.huya.com/moment/getMomentContent', video_id,
query={'videoId': video_id})['data']['moment']['videoInfo']
formats = []
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
formats.append({
'url': definition['url'],
**traverse_obj(definition, {
'format_id': ('defName', {str}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'filesize': ('size', {int_or_none}),
}),
})
return {
'id': video_id,
'formats': formats,
**traverse_obj(video_data, {
'title': ('videoTitle', {str}),
'thumbnail': ('videoCover', {url_or_none}),
'duration': ('videoDuration', {parse_duration}),
'uploader': ('nickName', {str}),
'uploader_id': ('uid', {str_or_none}),
'upload_date': ('videoUploadTime', {unified_strdate}),
'view_count': ('videoPlayNum', {int_or_none}),
'comment_count': ('videoCommentNum', {int_or_none}),
'like_count': ('favorCount', {int_or_none}),
}),
}

View file

@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor):
'id': 'p51388', 'id': 'p51388',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Partička (92)', 'title': 'Partička (92)',
'description': 'md5:859d53beae4609e6dd7796413f1b6cac', 'description': 'md5:57943f6a50d6188288c3a579d2fd5f01',
'upload_date': '20201103', 'episode': 'Partička (92)',
'timestamp': 1604437480, 'season': 'Partička',
'series': 'Prima Partička',
'episode_number': 92,
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg',
},
'params': {
'skip_download': True, # m3u8 download
},
}, {
'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne',
'info_dict': {
'id': 'p1412199',
'ext': 'mp4',
'episode_number': 3,
'episode': 'Tenerife: V říši ohně',
'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c',
'duration': 3111.0,
'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768',
'title': 'Tenerife: V říši ohně',
'timestamp': 1711825800,
'upload_date': '20240330',
}, },
'params': { 'params': {
'skip_download': True, # m3u8 download 'skip_download': True, # m3u8 download
@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor):
video_id = self._search_regex(( video_id = self._search_regex((
r'productId\s*=\s*([\'"])(?P<id>p\d+)\1', r'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1', r'pproduct_id\s*=\s*([\'"])(?P<id>p\d+)\1',
r'let\s+videos\s*=\s*([\'"])(?P<id>p\d+)\1',
), webpage, 'real id', group='id', default=None) ), webpage, 'real id', group='id', default=None)
if not video_id: if not video_id:
@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor):
final_result = self._search_json_ld(webpage, video_id, default={}) final_result = self._search_json_ld(webpage, video_id, default={})
final_result.update({ final_result.update({
'id': video_id, 'id': video_id,
'title': title, 'title': final_result.get('title') or title,
'thumbnail': self._html_search_meta( 'thumbnail': self._html_search_meta(
['thumbnail', 'og:image', 'twitter:image'], ['thumbnail', 'og:image', 'twitter:image'],
webpage, 'thumbnail', default=None), webpage, 'thumbnail', default=None),

View file

@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE):
class SangiinInstructionIE(InfoExtractor): class SangiinInstructionIE(InfoExtractor):
_VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php' _VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php'
IE_DESC = False # this shouldn't be listed as a supported site IE_DESC = False # this shouldn't be listed as a supported site
def _real_extract(self, url): def _real_extract(self, url):
raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True) raise ExtractorError(
'Copy the link from the button below the video description/player '
'and use that link to download. If there is no button in the frame, '
'get the URL of the frame showing the video.', expected=True)
class SangiinIE(InfoExtractor): class SangiinIE(InfoExtractor):

View file

@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor):
(?: (?:
kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?| kaltura:(?P<partner_id>\w+):(?P<id>\w+)(?::(?P<player_type>\w+))?|
https?:// https?://
(:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ (?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
(?: (?:
(?: (?:
# flash player # flash player

View file

@ -15,7 +15,7 @@ from ..utils import (
class KhanAcademyBaseIE(InfoExtractor): class KhanAcademyBaseIE(InfoExtractor):
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70' _PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4'
def _parse_video(self, video): def _parse_video(self, video):
return { return {
@ -39,7 +39,7 @@ class KhanAcademyBaseIE(InfoExtractor):
query={ query={
'fastly_cacheable': 'persist_until_publish', 'fastly_cacheable': 'persist_until_publish',
'pcv': self._PUBLISHED_CONTENT_VERSION, 'pcv': self._PUBLISHED_CONTENT_VERSION,
'hash': '1242644265', 'hash': '3712657851',
'variables': json.dumps({ 'variables': json.dumps({
'path': display_id, 'path': display_id,
'countryCode': 'US', 'countryCode': 'US',

View file

@ -1,9 +1,14 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
from ..utils import ( from ..utils import (
UserNotLive, UserNotLive,
determine_ext,
float_or_none, float_or_none,
int_or_none,
merge_dicts, merge_dicts,
parse_iso8601,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
unified_timestamp, unified_timestamp,
@ -25,104 +30,212 @@ class KickBaseIE(InfoExtractor):
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs): def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
return self._download_json( return self._download_json(
f'https://kick.com/api/v1/{path}', display_id, note=note, f'https://kick.com/api/{path}', display_id, note=note,
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs) headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
class KickIE(KickBaseIE): class KickIE(KickBaseIE):
IE_NAME = 'kick:live'
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://kick.com/yuppy', 'url': 'https://kick.com/buddha',
'info_dict': { 'info_dict': {
'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21', 'id': '92722911-nopixel-40',
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'description': str, 'description': str,
'channel': 'yuppy',
'channel_id': '33538',
'uploader': 'Yuppy',
'uploader_id': '33793',
'upload_date': str,
'live_status': 'is_live',
'timestamp': int, 'timestamp': int,
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:https?://.+\.jpg',
'categories': list, 'categories': list,
'upload_date': str,
'channel': 'buddha',
'channel_id': '32807',
'uploader': 'Buddha',
'uploader_id': '33057',
'live_status': 'is_live',
'concurrent_view_count': int,
'release_timestamp': int,
'age_limit': 18,
'release_date': str,
}, },
'skip': 'livestream', 'params': {'skip_download': 'livestream'},
# 'skip': 'livestream',
}, { }, {
'url': 'https://kick.com/kmack710', 'url': 'https://kick.com/xqc',
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
channel = self._match_id(url) channel = self._match_id(url)
response = self._call_api(f'channels/{channel}', channel) response = self._call_api(f'v2/channels/{channel}', channel)
if not traverse_obj(response, 'livestream', expected_type=dict): if not traverse_obj(response, 'livestream', expected_type=dict):
raise UserNotLive(video_id=channel) raise UserNotLive(video_id=channel)
return { return {
'id': str(traverse_obj(
response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
'formats': self._extract_m3u8_formats(
response['playback_url'], channel, 'mp4', live=True),
'title': traverse_obj(
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
'description': traverse_obj(response, ('user', 'bio')),
'channel': channel, 'channel': channel,
'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
'uploader': traverse_obj(response, 'name', ('user', 'username')),
'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
'is_live': True, 'is_live': True,
'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))), 'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True),
'thumbnail': traverse_obj( **traverse_obj(response, {
response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none), 'id': ('livestream', 'slug', {str}),
'categories': traverse_obj(response, ('recent_categories', ..., 'name')), 'title': ('livestream', 'session_title', {str}),
'description': ('user', 'bio', {str}),
'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any),
'uploader': (('name', ('user', 'username')), {str}, any),
'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any),
'timestamp': ('livestream', 'created_at', {unified_timestamp}),
'release_timestamp': ('livestream', 'start_time', {unified_timestamp}),
'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}),
'categories': ('recent_categories', ..., 'name', {str}),
'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}),
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
}),
} }
class KickVODIE(KickBaseIE): class KickVODIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' IE_NAME = 'kick:vod'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{ _TESTS = [{
'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3', 'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'md5': '3870f94153e40e7121a6e46c068b70cb', 'md5': '3870f94153e40e7121a6e46c068b70cb',
'info_dict': { 'info_dict': {
'id': '58bac65b-e641-4476-a7ba-3707a35e60e3', 'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea',
'ext': 'mp4', 'ext': 'mp4',
'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠', 'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑',
'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d', 'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
'channel': 'jaredfps', 'channel': 'xqc',
'channel_id': '26608', 'channel_id': '668',
'uploader': 'JaredFPS', 'uploader': 'xQc',
'uploader_id': '26799', 'uploader_id': '676',
'upload_date': '20240402', 'upload_date': '20240909',
'timestamp': 1712097108, 'timestamp': 1725919141,
'duration': 33859.0, 'duration': 10155.0,
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'categories': ['Call of Duty: Warzone'], 'view_count': int,
'categories': ['Just Chatting'],
'age_limit': 0,
}, },
'params': { 'params': {'skip_download': 'm3u8'},
'skip_download': 'm3u8',
},
'expected_warnings': [r'impersonation'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
response = self._call_api(f'video/{video_id}', video_id) response = self._call_api(f'v1/video/{video_id}', video_id)
return { return {
'id': video_id, 'id': video_id,
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'), 'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
'title': traverse_obj( **traverse_obj(response, {
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), 'title': ('livestream', ('session_title', 'slug'), {str}, any),
'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')), 'description': ('livestream', 'channel', 'user', 'bio', {str}),
'channel': traverse_obj(response, ('livestream', 'channel', 'slug')), 'channel': ('livestream', 'channel', 'slug', {str}),
'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))), 'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}),
'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')), 'uploader': ('livestream', 'channel', 'user', 'username', {str}),
'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))), 'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
'timestamp': unified_timestamp(response.get('created_at')), 'timestamp': ('created_at', {parse_iso8601}),
'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000), 'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
'thumbnail': traverse_obj( 'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
response, ('livestream', 'thumbnail'), expected_type=url_or_none), 'categories': ('livestream', 'categories', ..., 'name', {str}),
'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')), 'view_count': ('views', {int_or_none}),
'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
}),
}
class KickClipIE(KickBaseIE):
IE_NAME = 'kick:clips'
_VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?P<id>clip_[\w-]+)'
_TESTS = [{
'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
'info_dict': {
'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
'ext': 'mp4',
'title': 'Maddy detains Abd D:',
'channel': 'mxddy',
'channel_id': '133789',
'uploader': 'AbdCreates',
'uploader_id': '3309077',
'thumbnail': r're:^https?://.*\.jpeg',
'duration': 35,
'timestamp': 1682481453,
'upload_date': '20230426',
'view_count': int,
'like_count': int,
'categories': ['VALORANT'],
'age_limit': 18,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3',
'info_dict': {
'id': 'clip_01H9SKET879NE7N9RJRRDS98J3',
'title': 'W jews',
'ext': 'mp4',
'channel': 'destiny',
'channel_id': '1772249',
'uploader': 'punished_furry',
'uploader_id': '2027722',
'duration': 49.0,
'upload_date': '20230908',
'timestamp': 1694150180,
'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png',
'view_count': int,
'like_count': int,
'categories': ['Just Chatting'],
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5',
'info_dict': {
'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5',
'ext': 'mp4',
'title': 'KLJASLDJKLJKASDLJKDAS',
'channel': 'spreen',
'channel_id': '5312671',
'uploader': 'AnormalBarraBaja',
'uploader_id': '26518262',
'duration': 43.0,
'upload_date': '20240927',
'timestamp': 1727399987,
'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp',
'view_count': int,
'like_count': int,
'categories': ['Minecraft'],
'age_limit': 0,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
clip_id = self._match_id(url)
clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip']
clip_url = clip['clip_url']
if determine_ext(clip_url) == 'm3u8':
formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4')
else:
formats = [{'url': clip_url}]
return {
'id': clip_id,
'formats': formats,
**traverse_obj(clip, {
'title': ('title', {str}),
'channel': ('channel', 'slug', {str}),
'channel_id': ('channel', 'id', {int}, {str_or_none}),
'uploader': ('creator', 'username', {str}),
'uploader_id': ('creator', 'id', {int}, {str_or_none}),
'thumbnail': ('thumbnail_url', {url_or_none}),
'duration': ('duration', {float_or_none}),
'categories': ('category', 'name', {str}, all),
'timestamp': ('created_at', {parse_iso8601}),
'view_count': ('views', {int_or_none}),
'like_count': ('likes', {int_or_none}),
'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}),
}),
} }

126
yt_dlp/extractor/kika.py Normal file
View file

@ -0,0 +1,126 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
url_or_none,
)
from ..utils.traversal import traverse_obj
class KikaIE(InfoExtractor):
IE_DESC = 'KiKA.de'
_VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P<id>[a-z-]+\d+)'
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
'md5': 'fbfc8da483719ef06f396e5e5b938c69',
'info_dict': {
'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100',
'ext': 'mp4',
'upload_date': '20240831',
'timestamp': 1725126600,
'season_number': 2024,
'modified_date': '20240831',
'episode': 'Episode 476',
'episode_number': 476,
'season': 'Season 2024',
'duration': 634,
'title': 'logo! vom Samstag, 31. August 2024',
'modified_timestamp': 1725129983,
},
}, {
'url': 'https://www.kika.de/kaltstart/videos/video92498',
'md5': '710ece827e5055094afeb474beacb7aa',
'info_dict': {
'id': 'video92498',
'ext': 'mp4',
'title': '7. Wo ist Leo?',
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
'duration': 436,
'timestamp': 1702926876,
'upload_date': '20231218',
'episode_number': 7,
'modified_date': '20240319',
'modified_timestamp': 1710880610,
'episode': 'Episode 7',
'season_number': 1,
'season': 'Season 1',
},
}, {
'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088',
'md5': 'ffd1b700d7de0a6616a1d08544c77294',
'info_dict': {
'id': 'video90088',
'ext': 'mp4',
'upload_date': '20221102',
'timestamp': 1667390580,
'duration': 197,
'modified_timestamp': 1711093771,
'episode_number': 8,
'title': 'Es ist nicht leicht, ein Astrobrot zu sein',
'modified_date': '20240322',
'description': 'md5:d3641deaf1b5515a160788b2be4159a9',
'season_number': 1,
'episode': 'Episode 8',
'season': 'Season 1',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
video_assets = self._download_json(doc['assets']['url'], video_id)
subtitles = {}
if ttml_resource := url_or_none(video_assets.get('videoSubtitle')):
subtitles['de'] = [{
'url': ttml_resource,
'ext': 'ttml',
}]
if webvtt_resource := url_or_none(video_assets.get('webvttUrl')):
subtitles.setdefault('de', []).append({
'url': webvtt_resource,
'ext': 'vtt',
})
return {
'id': video_id,
'formats': list(self._extract_formats(video_assets, video_id)),
'subtitles': subtitles,
**traverse_obj(doc, {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('date', {parse_iso8601}),
'modified_timestamp': ('modificationDate', {parse_iso8601}),
'duration': ((
('durationInSeconds', {int_or_none}),
('duration', {parse_duration})), any),
'episode_number': ('episodeNumber', {int_or_none}),
'season_number': ('season', {int_or_none}),
}),
}
def _extract_formats(self, media_info, video_id):
for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))):
stream_url = media['url']
ext = determine_ext(stream_url)
if ext == 'm3u8':
yield from self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
yield {
'url': stream_url,
'format_id': ext,
**traverse_obj(media, {
'width': ('frameWidth', {int_or_none}),
'height': ('frameHeight', {int_or_none}),
# NB: filesize is 0 if unknown, bitrate is -1 if unknown
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}),
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
}),
}

View file

@ -0,0 +1,78 @@
import functools
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_id,
join_nonempty,
parse_duration,
unified_timestamp,
)
from ..utils.traversal import traverse_obj
class LearningOnScreenIE(InfoExtractor):
_VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
_TESTS = [{
'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
'info_dict': {
'id': '005D81B2',
'ext': 'mp4',
'title': 'Planet Earth',
'duration': 3600.0,
'timestamp': 1164567600.0,
'upload_date': '20061126',
'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
},
}]
def _real_initialize(self):
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
self.raise_login_required(
'Use --cookies for authentication. See '
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
'for how to manually pass cookies', method=None)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
details = traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'programme-details')}, {
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
'timestamp': (
{functools.partial(get_element_by_class, 'broadcast-date')},
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
'duration': (
{functools.partial(get_element_by_class, 'prog-running-time')},
{clean_html}, {parse_duration}),
}))
title = details.pop('title', None) or traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
{extract_attributes}, 'data-record-title', {clean_html}))
entries = self._parse_html5_media_entries(
'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
_headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
if not entries:
raise ExtractorError('No video found')
if len(entries) > 1:
duration = details.pop('duration', None)
for idx, entry in enumerate(entries, start=1):
entry.update(details)
entry['id'] = join_nonempty(video_id, idx)
entry['title'] = join_nonempty(title, idx)
return self.playlist_result(entries, video_id, title, duration=duration)
return {
**entries[0],
**details,
'id': video_id,
'title': title,
}

View file

@ -1,86 +1,11 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
format_field, format_field,
int_or_none, int_or_none,
parse_iso8601,
unified_strdate, unified_strdate,
) )
class LnkGoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
_TESTS = [{
'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
'info_dict': {
'id': '10809',
'ext': 'mp4',
'title': "Put'ka: Trys Klausimai",
'upload_date': '20161216',
'description': 'Seniai matytas Putka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
'age_limit': 18,
'duration': 117,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1481904000,
},
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
'info_dict': {
'id': '10467',
'ext': 'mp4',
'title': 'Nėrdas: Kompiuterio Valymas',
'upload_date': '20150113',
'description': 'md5:7352d113a242a808676ff17e69db6a69',
'age_limit': 18,
'duration': 346,
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1421164800,
},
'params': {
'skip_download': True, # HLS download
},
}, {
'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
'only_matching': True,
}]
_AGE_LIMITS = {
'N-7': 7,
'N-14': 14,
'S': 18,
}
_M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups()
video_info = self._download_json(
'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'),
display_id)['videoConfig']['videoInfo']
video_id = str(video_info['id'])
title = video_info['title']
prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
formats = self._extract_m3u8_formats(
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
video_id, 'mp4', 'm3u8_native')
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'),
'duration': int_or_none(video_info.get('duration')),
'description': clean_html(video_info.get('htmlDescription')),
'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
'timestamp': parse_iso8601(video_info.get('airDate')),
'view_count': int_or_none(video_info.get('viewsCount')),
}
class LnkIE(InfoExtractor): class LnkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P<id>\d+)'

View file

@ -92,9 +92,9 @@ class LoomIE(InfoExtractor):
}, },
'params': {'videopassword': 'seniorinfants2'}, 'params': {'videopassword': 'seniorinfants2'},
}, { }, {
# embed, transcoded-url endpoint sends empty JSON response # embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e', 'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
'md5': '8488817242a0db1cb2ad0ea522553cf6', 'md5': 'b321d261656848c184a94e3b93eae28d',
'info_dict': { 'info_dict': {
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e', 'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
'ext': 'mp4', 'ext': 'mp4',
@ -104,6 +104,7 @@ class LoomIE(InfoExtractor):
'timestamp': 1657216459, 'timestamp': 1657216459,
'duration': 181, 'duration': 181,
}, },
'params': {'format': 'bestvideo'}, # Test video-only fixup
'expected_warnings': ['Failed to parse JSON'], 'expected_warnings': ['Failed to parse JSON'],
}] }]
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
@ -293,7 +294,11 @@ class LoomIE(InfoExtractor):
format_url = format_url.replace('-split.m3u8', '.m3u8') format_url = format_url.replace('-split.m3u8', '.m3u8')
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality) format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
# Sometimes only split video/audio formats are available, need to fixup video-only formats
is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec'))
for fmt in m3u8_formats: for fmt in m3u8_formats:
if is_not_premerged and fmt.get('vcodec') != 'none':
fmt['acodec'] = 'none'
yield { yield {
**fmt, **fmt,
'url': update_url(fmt['url'], query=query), 'url': update_url(fmt['url'], query=query),

View file

@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor):
video_data = None video_data = None
# fix meta_url if missing the host address # fix meta_url if missing the host address
if re.match(r'^\/\+\/', meta_url): if re.match(r'\/\+\/', meta_url):
meta_url = urljoin('https://my.mail.ru', meta_url) meta_url = urljoin('https://my.mail.ru', meta_url)
if meta_url: if meta_url:

View file

@ -13,8 +13,8 @@ from ..utils import (
class MDRIE(InfoExtractor): class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA' IE_DESC = 'MDR.DE'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' _VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
_GEO_COUNTRIES = ['DE'] _GEO_COUNTRIES = ['DE']
@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
'uploader': 'MITTELDEUTSCHER RUNDFUNK', 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
}, },
'skip': '404 not found', 'skip': '404 not found',
}, {
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
'id': '19636',
'ext': 'mp4',
'title': 'Baumhaus vom 30. Oktober 2015',
'duration': 134,
'uploader': 'KIKA',
},
'skip': '404 not found',
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
'timestamp': 1482541200,
'upload_date': '20161224',
'duration': 4628,
'uploader': 'KIKA',
},
}, { }, {
# audio with alternative playerURL pattern # audio with alternative playerURL pattern
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html', 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
'duration': 3239, 'duration': 3239,
'uploader': 'MITTELDEUTSCHER RUNDFUNK', 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
}, },
}, { 'skip': '404 not found',
# empty bitrateVideo and bitrateAudio
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
'info_dict': {
'id': '128372',
'ext': 'mp4',
'title': 'Der kleine Wichtel kehrt zurück',
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
'duration': 4876,
'timestamp': 1607823300,
'upload_date': '20201213',
'uploader': 'ZDF',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'only_matching': True,
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'only_matching': True,
}, { }, {
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html', 'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
'only_matching': True, 'only_matching': True,

View file

@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor):
(?P<id>[^/#?_]+)''' (?P<id>[^/#?_]+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
'info_dict': {
'id': '668177',
'title': 'Az ajtó',
'display_id': 'az-ajto',
'ext': 'mp4',
'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
},
}, {
# (old) mediaklikk. date in html. # (old) mediaklikk. date in html.
'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/', 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
'info_dict': { 'info_dict': {
@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230903', 'upload_date': '20230903',
'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg', 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
}, },
'skip': 'Webpage redirects to 404 page',
}, { }, {
# (old) m4sport # (old) m4sport
'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/', 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230908', 'upload_date': '20230908',
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg', 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
}, },
'skip': 'Webpage redirects to 404 page',
}, { }, {
# m4sport with *video/ url and no date # m4sport with *video/ url and no date
'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/', 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png', 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
}, },
'skip': 'Webpage redirects to 404 page',
}, { }, {
# (old) hirado # (old) hirado
'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/', 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230911', 'upload_date': '20230911',
'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg', 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
}, },
'skip': 'Webpage redirects to video list page',
}, { }, {
# (old) petofilive # (old) petofilive
'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/', 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor):
'upload_date': '20230909', 'upload_date': '20230909',
'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg', 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
}, },
'skip': 'Webpage redirects to video list page',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -133,7 +147,9 @@ class MediaKlikkIE(InfoExtractor):
r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None)) r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
player_data['video'] = player_data.pop('token') player_data['video'] = player_data.pop('token')
player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data) player_page = self._download_webpage(
'https://player.mediaklikk.hu/playernew/player.php', video_id,
query=player_data, headers={'Referer': url})
player_json = self._search_json( player_json = self._search_json(
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);') r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
playlist_url = traverse_obj( playlist_url = traverse_obj(
@ -141,14 +157,14 @@ class MediaKlikkIE(InfoExtractor):
if not playlist_url: if not playlist_url:
raise ExtractorError('Unable to extract playlist url') raise ExtractorError('Unable to extract playlist url')
formats = self._extract_wowza_formats( formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'display_id': display_id, 'display_id': display_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
'upload_date': upload_date, 'upload_date': upload_date,
'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage), 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
} }

View file

@ -16,7 +16,7 @@ from ..utils import (
class MGTVIE(InfoExtractor): class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV' IE_DESC = '芒果TV'
IE_NAME = 'MangoTV' IE_NAME = 'MangoTV'

View file

@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor):
class OCWMITIE(InfoExtractor): class OCWMITIE(InfoExtractor):
IE_NAME = 'ocw.mit.edu' IE_NAME = 'ocw.mit.edu'
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' _VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
_BASE_URL = 'http://ocw.mit.edu/' _BASE_URL = 'http://ocw.mit.edu/'
_TESTS = [ _TESTS = [

View file

@ -1,16 +1,21 @@
import json
import re import re
import urllib.parse import time
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError,
determine_ext, determine_ext,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
jwt_decode_hs256,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
try_get, try_get,
url_or_none, url_or_none,
urlencode_postdata,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
@ -276,81 +281,225 @@ class MLBVideoIE(MLBBaseIE):
class MLBTVIE(InfoExtractor): class MLBTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})' _VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
_NETRC_MACHINE = 'mlb' _NETRC_MACHINE = 'mlb'
_TESTS = [{ _TESTS = [{
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638', 'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
'info_dict': { 'info_dict': {
'id': '661581', 'id': '661581',
'ext': 'mp4', 'ext': 'mp4',
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies', 'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
'release_date': '20220702',
'release_timestamp': 1656792300,
}, },
'params': { 'params': {'skip_download': 'm3u8'},
'skip_download': True, }, {
# makeup game: has multiple dates, need to avoid games with 'rescheduleDate'
'url': 'https://www.mlb.com/tv/g747039/vd22541c4-5a29-45f7-822b-635ec041cf5e',
'info_dict': {
'id': '747039',
'ext': 'mp4',
'title': '2024-07-29 - Toronto Blue Jays @ Baltimore Orioles',
'release_date': '20240729',
'release_timestamp': 1722280200,
}, },
'params': {'skip_download': 'm3u8'},
}] }]
_GRAPHQL_INIT_QUERY = '''\
mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
initSession(device: $device, clientType: $clientType, experience: $experience) {
deviceId
sessionId
entitlements {
code
}
location {
countryCode
regionName
zipCode
latitude
longitude
}
clientExperience
features
}
}'''
_GRAPHQL_PLAYBACK_QUERY = '''\
mutation initPlaybackSession(
$adCapabilities: [AdExperienceType]
$mediaId: String!
$deviceId: String!
$sessionId: String!
$quality: PlaybackQuality
) {
initPlaybackSession(
adCapabilities: $adCapabilities
mediaId: $mediaId
deviceId: $deviceId
sessionId: $sessionId
quality: $quality
) {
playbackSessionId
playback {
url
token
expiration
cdn
}
}
}'''
_APP_VERSION = '7.8.2'
_device_id = None
_session_id = None
_access_token = None _access_token = None
_token_expiry = 0
@property
def _api_headers(self):
if (self._token_expiry - 120) <= time.time():
self.write_debug('Access token has expired; re-logging in')
self._perform_login(*self._get_login_info())
return {'Authorization': f'Bearer {self._access_token}'}
def _real_initialize(self): def _real_initialize(self):
if not self._access_token: if not self._access_token:
self.raise_login_required( self.raise_login_required(
'All videos are only available to registered users', method='password') 'All videos are only available to registered users', method='password')
def _set_device_id(self, username):
if not self._device_id:
self._device_id = self.cache.load(
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
if self._device_id:
return
self._device_id = str(uuid.uuid4())
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
def _perform_login(self, username, password): def _perform_login(self, username, password):
data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356' try:
access_token = self._download_json( self._access_token = self._download_json(
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None, 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
headers={ 'Logging in', 'Unable to log in', headers={
'User-Agent': 'okhttp/3.12.1', 'User-Agent': 'okhttp/3.12.1',
'Content-Type': 'application/x-www-form-urlencoded', 'Content-Type': 'application/x-www-form-urlencoded',
}, data=data.encode())['access_token'] }, data=urlencode_postdata({
'grant_type': 'password',
'username': username,
'password': password,
'scope': 'openid offline_access',
'client_id': '0oa3e1nutA1HLzAKG356',
}))['access_token']
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 400:
raise ExtractorError('Invalid username or password', expected=True)
raise
entitlement = self._download_webpage( self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0
f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None, self._set_device_id(username)
headers={
'User-Agent': 'okhttp/3.12.1',
'Authorization': f'Bearer {access_token}',
})
data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv' self._session_id = self._call_api({
self._access_token = self._download_json( 'operationName': 'initSession',
'https://us.edge.bamgrid.com/token', None, 'query': self._GRAPHQL_INIT_QUERY,
'variables': {
'device': {
'appVersion': self._APP_VERSION,
'deviceFamily': 'desktop',
'knownDeviceId': self._device_id,
'languagePreference': 'ENGLISH',
'manufacturer': '',
'model': '',
'os': '',
'osVersion': '',
},
'clientType': 'WEB',
},
}, None, 'session ID')['data']['initSession']['sessionId']
def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True):
return self._download_json(
'https://media-gateway.mlb.com/graphql', video_id,
f'Downloading {description}', f'Unable to download {description}', fatal=fatal,
headers={ headers={
**self._api_headers,
'Accept': 'application/json', 'Accept': 'application/json',
'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk', 'Content-Type': 'application/json',
'Content-Type': 'application/x-www-form-urlencoded', 'x-client-name': 'WEB',
}, data=data.encode())['access_token'] 'x-client-version': self._APP_VERSION,
}, data=json.dumps(data, separators=(',', ':')).encode())
def _extract_formats_and_subtitles(self, broadcast, video_id):
feed = traverse_obj(broadcast, ('homeAway', {str.title}))
medium = traverse_obj(broadcast, ('type', {str}))
language = traverse_obj(broadcast, ('language', {str.lower}))
format_id = join_nonempty(feed, medium, language)
response = self._call_api({
'operationName': 'initPlaybackSession',
'query': self._GRAPHQL_PLAYBACK_QUERY,
'variables': {
'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'],
'deviceId': self._device_id,
'mediaId': broadcast['mediaId'],
'quality': 'PLACEHOLDER',
'sessionId': self._session_id,
},
}, video_id, f'{format_id} broadcast JSON', fatal=False)
playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict}))
m3u8_url = traverse_obj(playback, ('url', {url_or_none}))
token = traverse_obj(playback, ('token', {str}))
if not (m3u8_url and token):
errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str})))
if 'not entitled' in errors:
raise ExtractorError(errors, expected=True)
elif errors: # Only warn when 'blacked out' since radio formats are available
self.report_warning(f'API returned errors for {format_id}: {errors}')
else:
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
return [], {}
cdn_headers = {'x-cdn-token': token}
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
m3u8_id=format_id, fatal=False, headers=cdn_headers)
for fmt in fmts:
fmt['http_headers'] = cdn_headers
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
fmt.setdefault('language', language)
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
fmt['source_preference'] = 10
return fmts, subs
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
airings = self._download_json( data = self._download_json(
f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D', 'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
video_id)['data']['Airings'] 'gamePk': video_id,
'hydrate': 'broadcasts(all),statusFlags',
})
metadata = traverse_obj(data, (
'dates', ..., 'games',
lambda _, v: str(v['gamePk']) == video_id and not v.get('rescheduleDate'), any))
broadcasts = traverse_obj(metadata, (
'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
formats, subtitles = [], {} formats, subtitles = [], {}
for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']): for broadcast in broadcasts:
format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing) fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id)
m3u8_url = traverse_obj(self._download_json( formats.extend(fmts)
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id, self._merge_subtitles(subs, target=subtitles)
note=f'Downloading {format_id} stream info JSON',
errnote=f'Failed to download {format_id} stream info, skipping',
fatal=False, headers={
'Authorization': self._access_token,
'Accept': 'application/vnd.media-service+json; version=2',
}), ('stream', 'complete', {url_or_none}))
if not m3u8_url:
continue
f, s = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
formats.extend(f)
self._merge_subtitles(s, target=subtitles)
return { return {
'id': video_id, 'id': video_id,
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False), 'title': join_nonempty(
'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE', traverse_obj(metadata, ('officialDate', {str})),
traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})),
delim=' - '),
'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON',
'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
} }

View file

@ -0,0 +1,121 @@
from .common import InfoExtractor
from ..utils import js_to_json, remove_end, update_url_query
class MojevideoIE(InfoExtractor):
IE_DESC = 'mojevideo.sk'
_VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P<id>\w+)/(?P<display_id>[\w()]+?)\.html'
_TESTS = [{
'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html',
'md5': '384a4628bd2bbd261c5206cf77c38c17',
'info_dict': {
'id': '3d17c',
'ext': 'mp4',
'title': 'Chlapci dobetónovali sme, máme hotovo!',
'display_id': 'chlapci_dobetonovali_sme_mame_hotovo',
'description': 'md5:a0822126044050d304a9ef58c92ddb34',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg',
'duration': 21.0,
'upload_date': '20230919',
'timestamp': 1695129706,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html',
'md5': '517c3e111c53a67d10b429c1f344ba2f',
'info_dict': {
'id': '14677',
'ext': 'mp4',
'title': 'Deň blbec?',
'display_id': 'den_blbec',
'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg',
'duration': 100.0,
'upload_date': '20120515',
'timestamp': 1337076481,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html',
'md5': '64599a23d3ac31cf2fe069e4353d8162',
'info_dict': {
'id': '2feb2',
'ext': 'mp4',
'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)',
'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)',
'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.',
'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg',
'duration': 240.0,
'upload_date': '20190708',
'timestamp': 1562576592,
'like_count': int,
'dislike_count': int,
'view_count': int,
'comment_count': int,
},
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html',
'only_matching': True,
}, {
# 720p
'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html',
'only_matching': True,
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html',
'only_matching': True,
}, {
# 1080p
'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html',
'only_matching': True,
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id)
video_id_dec = self._search_regex(
r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16))
video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry')
video_hashes = self._search_json(
r'\bvHash\s*=', webpage, 'video hashes', video_id,
contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
formats = []
for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
('', 1, 'normálna kvalita'),
('_lq', 0, 'nízka kvalita'),
('_hd', 2, 'HD-720p'),
('_fhd', 3, 'FULL HD-1080p'),
('_2k', 4, '2K-1440p'),
]):
formats.append({
'format_id': f'mp4-{quality}',
'quality': quality,
'format_note': format_note,
'url': update_url_query(
f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', {
'md5': video_hash,
'expires': video_exp,
}),
})
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'title': (self._og_search_title(webpage, default=None)
or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')),
'description': self._og_search_description(webpage),
**self._search_json_ld(webpage, video_id, default={}),
}

View file

@ -40,7 +40,6 @@ class NiconicoIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215', 'url': 'http://www.nicovideo.jp/watch/sm22312215',
'md5': 'd1a75c0823e2f629128c43e1212760f9',
'info_dict': { 'info_dict': {
'id': 'sm22312215', 'id': 'sm22312215',
'ext': 'mp4', 'ext': 'mp4',
@ -56,8 +55,8 @@ class NiconicoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'genres': ['未設定'], 'genres': ['未設定'],
'tags': [], 'tags': [],
'expected_protocol': str,
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
# File downloaded with and without credentials are different, so omit # File downloaded with and without credentials are different, so omit
# the md5 field # the md5 field
@ -77,8 +76,8 @@ class NiconicoIE(InfoExtractor):
'view_count': int, 'view_count': int,
'genres': ['音楽・サウンド'], 'genres': ['音楽・サウンド'],
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'], 'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
'expected_protocol': str,
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
# 'video exists but is marked as "deleted" # 'video exists but is marked as "deleted"
# md5 is unstable # md5 is unstable
@ -112,7 +111,6 @@ class NiconicoIE(InfoExtractor):
}, { }, {
# video not available via `getflv`; "old" HTML5 video # video not available via `getflv`; "old" HTML5 video
'url': 'http://www.nicovideo.jp/watch/sm1151009', 'url': 'http://www.nicovideo.jp/watch/sm1151009',
'md5': 'f95a3d259172667b293530cc2e41ebda',
'info_dict': { 'info_dict': {
'id': 'sm1151009', 'id': 'sm1151009',
'ext': 'mp4', 'ext': 'mp4',
@ -128,11 +126,10 @@ class NiconicoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'genres': ['ゲーム'], 'genres': ['ゲーム'],
'tags': [], 'tags': [],
'expected_protocol': str,
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
# "New" HTML5 video # "New" HTML5 video
# md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm31464864', 'url': 'http://www.nicovideo.jp/watch/sm31464864',
'info_dict': { 'info_dict': {
'id': 'sm31464864', 'id': 'sm31464864',
@ -149,12 +146,11 @@ class NiconicoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'genres': ['アニメ'], 'genres': ['アニメ'],
'tags': [], 'tags': [],
'expected_protocol': str,
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
# Video without owner # Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488', 'url': 'http://www.nicovideo.jp/watch/sm18238488',
'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
'info_dict': { 'info_dict': {
'id': 'sm18238488', 'id': 'sm18238488',
'ext': 'mp4', 'ext': 'mp4',
@ -168,8 +164,8 @@ class NiconicoIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'genres': ['エンターテイメント'], 'genres': ['エンターテイメント'],
'tags': [], 'tags': [],
'expected_protocol': str,
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True, 'only_matching': True,
@ -424,7 +420,7 @@ class NiconicoIE(InfoExtractor):
'x-request-with': 'https://www.nicovideo.jp', 'x-request-with': 'https://www.nicovideo.jp',
})['data']['contentUrl'] })['data']['contentUrl']
# Getting all audio formats results in duplicate video formats which we filter out later # Getting all audio formats results in duplicate video formats which we filter out later
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id) dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix # m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'): for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
@ -436,7 +432,6 @@ class NiconicoIE(InfoExtractor):
'asr': ('samplingRate', {int_or_none}), 'asr': ('samplingRate', {int_or_none}),
}), get_all=False), }), get_all=False),
'acodec': 'aac', 'acodec': 'aac',
'ext': 'm4a',
} }
# Sort before removing dupes to keep the format dicts with the lowest tbr # Sort before removing dupes to keep the format dicts with the lowest tbr
@ -458,9 +453,11 @@ class NiconicoIE(InfoExtractor):
if video_id.startswith('so'): if video_id.startswith('so'):
video_id = self._match_id(handle.url) video_id = self._match_id(handle.url)
api_data = self._parse_json(self._html_search_regex( api_data = traverse_obj(
'data-api-data="([^"]+)"', webpage, self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
'API data', default='{}'), video_id) ('data', 'response', {dict}))
if not api_data:
raise ExtractorError('Server response data not found')
except ExtractorError as e: except ExtractorError as e:
try: try:
api_data = self._download_json( api_data = self._download_json(

View file

@ -10,7 +10,7 @@ from ..utils import (
class NZOnScreenIE(InfoExtractor): class NZOnScreenIE(InfoExtractor):
_VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)' _VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982', 'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
'info_dict': { 'info_dict': {

View file

@ -1,9 +1,6 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
extract_attributes,
)
class NZZIE(InfoExtractor): class NZZIE(InfoExtractor):
@ -22,19 +19,14 @@ class NZZIE(InfoExtractor):
'playlist_count': 1, 'playlist_count': 1,
}] }]
def _entries(self, webpage, page_id):
for script in re.findall(r'(?s)<script[^>]* data-hid="jw-video-jw[^>]+>(.+?)</script>', webpage):
settings = self._search_json(r'var\s+settings\s*=[^{]*', script, 'settings', page_id, fatal=False)
if entry := self._parse_jwplayer_data(settings, page_id):
yield entry
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
entries = [] return self.playlist_result(self._entries(webpage, page_id), page_id)
for player_element in re.findall(
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
player_params = extract_attributes(player_element)
if player_params.get('data-type') not in ('kaltura_singleArticle',):
self.report_warning('Unsupported player type')
continue
entry_id = player_params['data-id']
entries.append(self.url_result(
'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
return self.playlist_result(entries, page_id)

View file

@ -1,9 +1,19 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, try_get from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
parse_qs,
try_get,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class OlympicsReplayIE(InfoExtractor): class OlympicsReplayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P<id>[^/#&?]+)' _VALID_URL = r'https?://(?:www\.)?olympics\.com/[a-z]{2}/(?:paris-2024/)?(?:replay|videos?|original-series/episode)/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays', 'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays',
'info_dict': { 'info_dict': {
@ -11,26 +21,105 @@ class OlympicsReplayIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020', 'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020',
'upload_date': '20210801', 'upload_date': '20210801',
'timestamp': 1627783200, 'timestamp': 1627797600,
'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3', 'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3',
'uploader': 'International Olympic Committee', 'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/nua4o7zwyaznoaejpbk2',
}, 'duration': 7017.0,
'params': {
'skip_download': True,
}, },
}, { }, {
'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp', 'url': 'https://olympics.com/en/original-series/episode/b-boys-and-b-girls-take-the-spotlight-breaking-life-road-to-paris-2024',
'only_matching': True, 'info_dict': {
'id': '32633650-c5ee-4280-8b94-fb6defb6a9b5',
'ext': 'mp4',
'title': 'B-girl Nicka - Breaking Life, Road to Paris 2024 | Episode 1',
'upload_date': '20240517',
'timestamp': 1715948200,
'description': 'md5:f63d728a41270ec628f6ac33ce471bb1',
'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/a3j96l7j6so3vyfijby1',
'duration': 1321.0,
},
}, {
'url': 'https://olympics.com/en/paris-2024/videos/men-s-preliminaries-gbr-esp-ned-rsa-hockey-olympic-games-paris-2024',
'info_dict': {
'id': '3d96db23-8eee-4b7c-8ef5-488a0361026c',
'ext': 'mp4',
'title': 'Men\'s Preliminaries GBR-ESP & NED-RSA | Hockey | Olympic Games Paris 2024',
'upload_date': '20240727',
'timestamp': 1722066600,
},
'skip': 'Geo-restricted to RU, BR, BT, NP, TM, BD, TL',
}, {
'url': 'https://olympics.com/en/paris-2024/videos/dnp-suni-lee-i-have-goals-and-i-have-expectations-for-myself-but-i-also-am-trying-to-give-myself-grace',
'info_dict': {
'id': 'a42f37ab-8a74-41d0-a7d9-af27b7b02a90',
'ext': 'mp4',
'title': 'md5:c7cfbc9918636a98e66400a812e4d407',
'upload_date': '20240729',
'timestamp': 1722288600,
},
}] }]
_GEO_BYPASS = False
def _extract_from_nextjs_data(self, webpage, video_id):
data = traverse_obj(self._search_nextjs_data(webpage, video_id, default={}), (
'props', 'pageProps', 'page', 'items',
lambda _, v: v['name'] == 'videoPlaylist', 'data', 'currentVideo', {dict}, any))
if not data:
return None
geo_countries = traverse_obj(data, ('countries', ..., {str}))
if traverse_obj(data, ('geoRestrictedVideo', {bool})):
self.raise_geo_restricted(countries=geo_countries)
is_live = traverse_obj(data, ('streamingStatus', {str})) == 'LIVE'
m3u8_url = traverse_obj(data, ('videoUrl', {url_or_none})) or data['streamUrl']
tokenized_url = self._tokenize_url(m3u8_url, data['jwtToken'], is_live, video_id)
try:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
tokenized_url, video_id, 'mp4', m3u8_id='hls')
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and 'georestricted' in e.cause.msg:
self.raise_geo_restricted(countries=geo_countries)
raise
return {
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
**traverse_obj(data, {
'id': ('videoID', {str}),
'title': ('title', {str}),
'timestamp': ('contentDate', {parse_iso8601}),
}),
}
def _tokenize_url(self, url, token, is_live, video_id):
return self._download_json(
'https://metering.olympics.com/tokengenerator', video_id,
'Downloading tokenized m3u8 url', query={
**parse_qs(url),
'url': update_url(url, query=None),
'service-id': 'live' if is_live else 'vod',
'user-auth': token,
})['data']['url']
def _legacy_tokenize_url(self, url, video_id):
return self._download_json(
'https://olympics.com/tokenGenerator', video_id,
'Downloading legacy tokenized m3u8 url', query={'url': url})
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if info := self._extract_from_nextjs_data(webpage, video_id):
return info
title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage) title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
uuid = self._html_search_meta('episode_uid', webpage) video_uuid = self._html_search_meta('episode_uid', webpage)
m3u8_url = self._html_search_meta('video_url', webpage) m3u8_url = self._html_search_meta('video_url', webpage)
json_ld = self._search_json_ld(webpage, uuid) json_ld = self._search_json_ld(webpage, video_uuid)
thumbnails_list = json_ld.get('image') thumbnails_list = json_ld.get('image')
if not thumbnails_list: if not thumbnails_list:
thumbnails_list = self._html_search_regex( thumbnails_list = self._html_search_regex(
@ -48,12 +137,12 @@ class OlympicsReplayIE(InfoExtractor):
'width': width, 'width': width,
'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)), 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)),
}) })
m3u8_url = self._download_json(
f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') formats, subtitles = self._extract_m3u8_formats_and_subtitles(
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls') self._legacy_tokenize_url(m3u8_url, video_uuid), video_uuid, 'mp4', m3u8_id='hls')
return { return {
'id': uuid, 'id': video_uuid,
'title': title, 'title': title,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': formats,

View file

@ -420,7 +420,7 @@ class PatreonIE(PatreonBaseIE):
class PatreonCampaignIE(PatreonBaseIE): class PatreonCampaignIE(PatreonBaseIE):
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P<campaign_id>\d+))|(?P<vanity>[-\w]+))' _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m|api/campaigns)/(?P<campaign_id>\d+)|(?P<vanity>[-\w]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://www.patreon.com/dissonancepod/', 'url': 'https://www.patreon.com/dissonancepod/',
'info_dict': { 'info_dict': {
@ -442,25 +442,44 @@ class PatreonCampaignIE(PatreonBaseIE):
'url': 'https://www.patreon.com/m/4767637/posts', 'url': 'https://www.patreon.com/m/4767637/posts',
'info_dict': { 'info_dict': {
'title': 'Not Just Bikes', 'title': 'Not Just Bikes',
'channel_follower_count': int,
'id': '4767637', 'id': '4767637',
'channel_id': '4767637', 'channel_id': '4767637',
'channel_url': 'https://www.patreon.com/notjustbikes', 'channel_url': 'https://www.patreon.com/notjustbikes',
'description': 'md5:595c6e7dca76ae615b1d38c298a287a1', 'description': 'md5:9f4b70051216c4d5c58afe580ffc8d0f',
'age_limit': 0, 'age_limit': 0,
'channel': 'Not Just Bikes', 'channel': 'Not Just Bikes',
'uploader_url': 'https://www.patreon.com/notjustbikes', 'uploader_url': 'https://www.patreon.com/notjustbikes',
'uploader': 'Not Just Bikes', 'uploader': 'Jason',
'uploader_id': '37306634', 'uploader_id': '37306634',
'thumbnail': r're:^https?://.*$', 'thumbnail': r're:^https?://.*$',
}, },
'playlist_mincount': 71, 'playlist_mincount': 71,
}, {
'url': 'https://www.patreon.com/api/campaigns/4243769/posts',
'info_dict': {
'title': 'Second Thought',
'channel_follower_count': int,
'id': '4243769',
'channel_id': '4243769',
'channel_url': 'https://www.patreon.com/secondthought',
'description': 'md5:69c89a3aba43efdb76e85eb023e8de8b',
'age_limit': 0,
'channel': 'Second Thought',
'uploader_url': 'https://www.patreon.com/secondthought',
'uploader': 'JT Chapman',
'uploader_id': '32718287',
'thumbnail': r're:^https?://.*$',
},
'playlist_mincount': 201,
}, { }, {
'url': 'https://www.patreon.com/dissonancepod/posts', 'url': 'https://www.patreon.com/dissonancepod/posts',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.patreon.com/m/5932659', 'url': 'https://www.patreon.com/m/5932659',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.patreon.com/api/campaigns/4243769',
'only_matching': True,
}] }]
@classmethod @classmethod

View file

@ -109,7 +109,7 @@ class PinterestBaseIE(InfoExtractor):
class PinterestIE(PinterestBaseIE): class PinterestIE(PinterestBaseIE):
_VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?P<id>\d+)' _VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?:[\w-]+--)?(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# formats found in data['videos'] # formats found in data['videos']
'url': 'https://www.pinterest.com/pin/664281013778109217/', 'url': 'https://www.pinterest.com/pin/664281013778109217/',
@ -174,6 +174,25 @@ class PinterestIE(PinterestBaseIE):
}, { }, {
'url': 'https://co.pinterest.com/pin/824721750502199491/', 'url': 'https://co.pinterest.com/pin/824721750502199491/',
'only_matching': True, 'only_matching': True,
},
{
'url': 'https://pinterest.com/pin/dive-into-serenity-blue-lagoon-pedi-nails-for-a-tranquil-and-refreshing-spa-experience-video-in-2024--2885187256207927',
'info_dict': {
'id': '2885187256207927',
'ext': 'mp4',
'title': 'Dive into Serenity: Blue Lagoon Pedi Nails for a Tranquil and Refreshing Spa Experience! 💙💅',
'description': 'md5:5da41c767d2317e42e49b663b0b2150f',
'uploader': 'Glamour Artistry |Everyday Outfits, Luxury Fashion & Nail Designs',
'uploader_id': '1142999717836434688',
'upload_date': '20240702',
'timestamp': 1719939156,
'duration': 7.967,
'comment_count': int,
'repost_count': int,
'categories': 'count:9',
'tags': ['#BlueLagoonPediNails', '#SpaExperience'],
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -628,8 +628,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
page_entries = self._extract_entries(webpage, host) page_entries = self._extract_entries(webpage, host)
if not page_entries: if not page_entries:
break break
for e in page_entries: yield from page_entries
yield e
if not self._has_more(webpage): if not self._has_more(webpage):
break break

View file

@ -7,6 +7,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
join_nonempty,
time_seconds, time_seconds,
try_call, try_call,
unified_timestamp, unified_timestamp,
@ -167,7 +168,7 @@ class RadikoBaseIE(InfoExtractor):
class RadikoIE(RadikoBaseIE): class RadikoIE(RadikoBaseIE):
_VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)'
_TESTS = [{ _TESTS = [{
# QRR (文化放送) station provides <desc> # QRR (文化放送) station provides <desc>
@ -183,8 +184,9 @@ class RadikoIE(RadikoBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
station, video_id = self._match_valid_url(url).groups() station, timestring = self._match_valid_url(url).group('station', 'timestring')
vid_int = unified_timestamp(video_id, False) video_id = join_nonempty(station, timestring)
vid_int = unified_timestamp(timestring, False)
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
auth_token, area_id = self._auth_client() auth_token, area_id = self._auth_client()
@ -207,7 +209,7 @@ class RadikoIE(RadikoBaseIE):
'ft': radio_begin, 'ft': radio_begin,
'end_at': radio_end, 'end_at': radio_end,
'to': radio_end, 'to': radio_end,
'seek': video_id, 'seek': timestring,
}, },
), ),
} }

View file

@ -16,7 +16,7 @@ from ..utils import (
class RadioFranceIE(InfoExtractor): class RadioFranceIE(InfoExtractor):
_VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)' _VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)'
IE_NAME = 'radiofrance' IE_NAME = 'radiofrance'
_TEST = { _TEST = {

View file

@ -6,7 +6,7 @@ from ..utils import (
class ReverbNationIE(InfoExtractor): class ReverbNationIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa',
'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645',

View file

@ -8,7 +8,7 @@ from ..utils import js_to_json
class RTPIE(InfoExtractor): class RTPIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
'md5': 'e736ce0c665e459ddb818546220b4ef8', 'md5': 'e736ce0c665e459ddb818546220b4ef8',
@ -19,9 +19,25 @@ class RTPIE(InfoExtractor):
'description': 'As paixões musicais de António Cartaxo e António Macedo', 'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
}, },
}, {
'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril',
'md5': '9a81ed53f2b2197cfa7ed455b12f8ade',
'info_dict': {
'id': 'e757904',
'ext': 'mp4',
'title': '25 Curiosidades, 25 de Abril',
'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr',
'thumbnail': r're:^https?://.*\.jpg',
},
}, { }, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano',
'only_matching': True,
}, {
'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon',
'only_matching': True,
}] }]
_RX_OBFUSCATION = re.compile(r'''(?xs) _RX_OBFUSCATION = re.compile(r'''(?xs)
@ -49,17 +65,17 @@ class RTPIE(InfoExtractor):
f, config = self._search_regex( f, config = self._search_regex(
r'''(?sx) r'''(?sx)
var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s* (?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)?
var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/) var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/)
''', webpage, ''', webpage,
'player config', group=('f', 'config')) 'player config', group=('f', 'config'))
f = self._parse_json(
f, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
config = self._parse_json( config = self._parse_json(
config, video_id, config, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id)) lambda data: self.__unobfuscate(data, video_id=video_id))
f = config['file'] if not f else self._parse_json(
f, video_id,
lambda data: self.__unobfuscate(data, video_id=video_id))
formats = [] formats = []
if isinstance(f, dict): if isinstance(f, dict):

View file

@ -8,14 +8,17 @@ from ..utils import (
UnsupportedError, UnsupportedError,
clean_html, clean_html,
determine_ext, determine_ext,
extract_attributes,
format_field, format_field,
get_element_by_class, get_element_by_class,
get_elements_html_by_class,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
parse_count, parse_count,
parse_iso8601, parse_iso8601,
traverse_obj, traverse_obj,
unescapeHTML, unescapeHTML,
urljoin,
) )
@ -382,8 +385,10 @@ class RumbleChannelIE(InfoExtractor):
if isinstance(e.cause, HTTPError) and e.cause.status == 404: if isinstance(e.cause, HTTPError) and e.cause.status == 404:
break break
raise raise
for video_url in re.findall(r'class="[^>"]*videostream__link[^>]+href="([^"]+\.html)"', webpage): for video_url in traverse_obj(
yield self.url_result('https://rumble.com' + video_url) get_elements_html_by_class('videostream__link', webpage), (..., {extract_attributes}, 'href'),
):
yield self.url_result(urljoin('https://rumble.com', video_url))
def _real_extract(self, url): def _real_extract(self, url):
url, playlist_id = self._match_valid_url(url).groups() url, playlist_id = self._match_valid_url(url).groups()

View file

@ -6,6 +6,7 @@ from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
parse_qs, parse_qs,
traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
@ -80,6 +81,8 @@ class RutubeBaseIE(InfoExtractor):
'url': format_url, 'url': format_url,
'format_id': format_id, 'format_id': format_id,
}) })
for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False))
return formats return formats
def _download_and_extract_formats(self, video_id, query=None): def _download_and_extract_formats(self, video_id, query=None):
@ -90,7 +93,7 @@ class RutubeBaseIE(InfoExtractor):
class RutubeIE(RutubeBaseIE): class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube' IE_NAME = 'rutube'
IE_DESC = 'Rutube videos' IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/(?:video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})' _VALID_URL = r'https?://rutube\.ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
_TESTS = [{ _TESTS = [{
@ -164,6 +167,29 @@ class RutubeIE(RutubeBaseIE):
'uploader': 'Стас Быков', 'uploader': 'Стас Быков',
}, },
'expected_warnings': ['Unable to download f4m'], 'expected_warnings': ['Unable to download f4m'],
}, {
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
'info_dict': {
'id': 'c58f502c7bb34a8fcdd976b221fca292',
'ext': 'mp4',
'categories': ['Телепередачи'],
'description': '',
'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
'live_status': 'is_live',
'age_limit': 0,
'uploader_id': '23460655',
'timestamp': 1652972968,
'view_count': int,
'upload_date': '20220519',
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader': 'Первый канал',
},
}, {
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
'only_matching': True,
}, {
'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
'only_matching': True,
}] }]
@classmethod @classmethod

View file

@ -36,7 +36,7 @@ class SampleFocusIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id, impersonate=True)
sample_id = self._search_regex( sample_id = self._search_regex(
r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)', r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)',
@ -82,7 +82,15 @@ class SampleFocusIE(InfoExtractor):
return { return {
'id': sample_id, 'id': sample_id,
'title': title, 'title': title,
'url': mp3_url, 'formats': [{
'url': mp3_url,
'ext': 'mp3',
'vcodec': 'none',
'acodec': 'mp3',
'http_headers': {
'Referer': url,
},
}],
'display_id': display_id, 'display_id': display_id,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': uploader, 'uploader': uploader,

View file

@ -0,0 +1,33 @@
from .common import InfoExtractor
class ScreenRecIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?screenrec\.com/share/(?P<id>\w{10})'
_TESTS = [{
'url': 'https://screenrec.com/share/DasLtbknYo',
'info_dict': {
'id': 'DasLtbknYo',
'ext': 'mp4',
'title': '02.05.2024_03.01.25_REC',
'description': 'Recorded with ScreenRec',
'thumbnail': r're:^https?://.*\.gif$',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8_url = self._search_regex(
r'customUrl\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 URL', group='url')
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'),
}

36
yt_dlp/extractor/sen.py Normal file
View file

@ -0,0 +1,36 @@
from .common import InfoExtractor
from ..utils import url_or_none
from ..utils.traversal import traverse_obj
class SenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sen\.com/video/(?P<id>[0-9a-f-]+)'
_TEST = {
'url': 'https://www.sen.com/video/eef46eb1-4d79-4e28-be9d-bd937767f8c4',
'md5': 'ff615aca9691053c94f8f10d96cd7884',
'info_dict': {
'id': 'eef46eb1-4d79-4e28-be9d-bd937767f8c4',
'ext': 'mp4',
'description': 'Florida, 28 Sep 2022',
'title': 'Hurricane Ian',
'tags': ['North America', 'Storm', 'Weather'],
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
api_data = self._download_json(f'https://api.sen.com/content/public/video/{video_id}', video_id)
m3u8_url = (traverse_obj(api_data, (
'data', 'nodes', lambda _, v: v['id'] == 'player', 'video', 'url', {url_or_none}, any))
or f'https://vod.sen.com/videos/{video_id}/manifest.m3u8')
return {
'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
**traverse_obj(api_data, ('data', 'nodes', lambda _, v: v['id'] == 'details', any, 'content', {
'title': ('title', 'text', {str}),
'description': ('descriptions', 0, 'text', {str}),
'tags': ('badges', ..., 'text', {str}),
})),
}

View file

@ -27,7 +27,7 @@ class ServusIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'AA-28BYCQNH92111', 'id': 'AA-28BYCQNH92111',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Klettersteige in den Alpen', 'title': 'Vie Ferrate - Klettersteige in den Alpen',
'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce', 'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 2823, 'duration': 2823,
@ -38,6 +38,7 @@ class ServusIE(InfoExtractor):
'season_number': 11, 'season_number': 11,
'episode': 'Episode 8 - Vie Ferrate Klettersteige in den Alpen', 'episode': 'Episode 8 - Vie Ferrate Klettersteige in den Alpen',
'episode_number': 8, 'episode_number': 8,
'categories': ['Bergwelten'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -71,8 +72,11 @@ class ServusIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url).upper() video_id = self._match_id(url).upper()
webpage = self._download_webpage(url, video_id)
next_data = self._search_nextjs_data(webpage, video_id, fatal=False)
video = self._download_json( video = self._download_json(
'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin', 'https://api-player.redbull.com/stv/servus-tv-playnet',
video_id, 'Downloading video JSON', query={'videoId': video_id}) video_id, 'Downloading video JSON', query={'videoId': video_id})
if not video.get('videoUrl'): if not video.get('videoUrl'):
self._report_errors(video) self._report_errors(video)
@ -89,7 +93,7 @@ class ServusIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video.get('title'), 'title': video.get('title'),
'description': self._get_description(video_id) or video.get('description'), 'description': self._get_description(next_data) or video.get('description'),
'thumbnail': video.get('poster'), 'thumbnail': video.get('poster'),
'duration': float_or_none(video.get('duration')), 'duration': float_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('currentSunrise')), 'timestamp': unified_timestamp(video.get('currentSunrise')),
@ -100,16 +104,19 @@ class ServusIE(InfoExtractor):
'episode_number': episode_number, 'episode_number': episode_number,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
**traverse_obj(next_data, ('props', 'pageProps', 'data', {
'title': ('title', 'rendered', {str}),
'timestamp': ('stv_date', 'raw', {int}),
'duration': ('stv_duration', {float_or_none}),
'categories': ('category_names', ..., {str}),
})),
} }
def _get_description(self, video_id): def _get_description(self, next_data):
info = self._download_json( return join_nonempty(*traverse_obj(next_data, (
f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page', 'props', 'pageProps', 'data',
video_id, fatal=False) ('stv_short_description', 'stv_long_description'), {str},
{lambda x: x.replace('\n\n', '\n')}, {unescapeHTML})), delim='\n\n')
return join_nonempty(*traverse_obj(info, (
('stv_short_description', 'stv_long_description'),
{lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
def _report_errors(self, video): def _report_errors(self, video):
playability_errors = traverse_obj(video, ('playabilityErrors', ...)) playability_errors = traverse_obj(video, ('playabilityErrors', ...))

View file

@ -0,0 +1,76 @@
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, url_or_none
from ..utils.traversal import traverse_obj
class SnapchatSpotlightIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?snapchat\.com/spotlight/(?P<id>\w+)'
_TESTS = [{
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
'md5': '46c580f63592d0cbb76e974d2f9f0fcc',
'info_dict': {
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYYWtidGhudGZpAX1TKn0JAX1TKnXJAAAAAA',
'ext': 'mp4',
'title': 'Views 💕',
'description': '',
'thumbnail': r're:https://cf-st\.sc-cdn\.net/d/kKJHIR1QAznRKK9jgYYDq\.256\.IRZXSOY',
'duration': 4.665,
'timestamp': 1637777831.369,
'upload_date': '20211124',
'repost_count': int,
'uploader': 'shreypatel57',
'uploader_url': 'https://www.snapchat.com/add/shreypatel57',
},
}, {
'url': 'https://www.snapchat.com/spotlight/W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
'md5': '4cd9626458c1a0e3e6dbe72c544a9ec2',
'info_dict': {
'id': 'W7_EDlXWTBiXAEEniNoMPwAAYcnVjYWdwcGV1AZEaIYn5AZEaIYnrAAAAAQ',
'ext': 'mp4',
'title': 'Spotlight Snap',
'description': 'How he flirt her teacher🤭🤭🤩😍 #kdrama#cdrama #dramaclips #dramaspotlight',
'thumbnail': r're:https://cf-st\.sc-cdn\.net/i/ztfr6xFs0FOcFhwVczWfj\.256\.IRZXSOY',
'duration': 10.91,
'timestamp': 1722720291.307,
'upload_date': '20240803',
'view_count': int,
'repost_count': int,
'uploader': 'ganda0535',
'uploader_url': 'https://www.snapchat.com/add/ganda0535',
'tags': ['#dramaspotlight', '#dramaclips', '#cdrama', '#kdrama'],
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
page_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
video_data = traverse_obj(page_props, (
'spotlightFeed', 'spotlightStories',
lambda _, v: v['story']['storyId']['value'] == video_id, 'metadata', any), None)
return {
'id': video_id,
'ext': 'mp4',
**traverse_obj(video_data, ('videoMetadata', {
'title': ('name', {str}),
'description': ('description', {str}),
'timestamp': ('uploadDateMs', {lambda x: float_or_none(x, 1000)}),
'view_count': ('viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
'repost_count': ('shareCount', {int_or_none}),
'url': ('contentUrl', {url_or_none}),
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
'duration': ('durationMs', {lambda x: float_or_none(x, 1000)}),
'thumbnail': ('thumbnailUrl', {url_or_none}),
'uploader': ('creator', 'personCreator', 'username', {str}),
'uploader_url': ('creator', 'personCreator', 'url', {url_or_none}),
})),
**traverse_obj(video_data, {
'description': ('description', {str}),
'tags': ('hashtags', ..., {str}),
'view_count': ('engagementStats', 'viewCount', {int_or_none}, {lambda x: None if x == -1 else x}),
'repost_count': ('engagementStats', 'shareCount', {int_or_none}),
}),
}

View file

@ -472,7 +472,7 @@ class SVTPageIE(SVTBaseIE):
title = self._og_search_title(webpage) title = self._og_search_title(webpage)
urql_state = self._search_json( urql_state = self._search_json(
r'window\.svt\.nyh\.urqlState\s*=', webpage, 'json data', display_id) r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {} data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}

View file

@ -8,7 +8,7 @@ from ..utils import (
class Tele13IE(InfoExtractor): class Tele13IE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?t13\.cl/videos(?:/[^/]+)+/(?P<id>[\w-]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda', 'url': 'http://www.t13.cl/videos/actualidad/el-circulo-de-hierro-de-michelle-bachelet-en-su-regreso-a-la-moneda',

View file

@ -1,33 +1,31 @@
import base64
import datetime as dt
import functools import functools
import itertools import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin from ..utils import int_or_none, traverse_obj, url_or_none, urljoin
class TenPlayIE(InfoExtractor): class TenPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})' _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
_NETRC_MACHINE = '10play' _NETRC_MACHINE = '10play'
_TESTS = [{ _TESTS = [{
'url': 'https://10play.com.au/neighbours/web-extras/season-39/nathan-borg-is-the-first-aussie-actor-with-a-cochlear-implant-to-join-neighbours/tpv210128qupwd', 'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz',
'info_dict': { 'info_dict': {
'id': '6226844312001', 'id': '6336940246112',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours', 'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours', 'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43', 'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach',
'duration': 186, 'duration': 74,
'season': 'Season 39', 'season': 'Season 41',
'season_number': 39, 'season_number': 41,
'series': 'Neighbours', 'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg', 'thumbnail': r're:https://.*\.jpg',
'uploader': 'Channel 10', 'uploader': 'Channel 10',
'age_limit': 15, 'age_limit': 15,
'timestamp': 1611810000, 'timestamp': 1694386800,
'upload_date': '20210128', 'upload_date': '20230910',
'uploader_id': '2199827728001', 'uploader_id': '2199827728001',
}, },
'params': { 'params': {
@ -35,21 +33,30 @@ class TenPlayIE(InfoExtractor):
}, },
'skip': 'Only available in Australia', 'skip': 'Only available in Australia',
}, { }, {
'url': 'https://10play.com.au/todd-sampsons-body-hack/episodes/season-4/episode-7/tpv200921kvngh', 'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
'info_dict': { 'info_dict': {
'id': '6192880312001', 'id': '9000000000091177',
'ext': 'mp4', 'ext': 'mp4',
'title': "Todd Sampson's Body Hack - S4 Ep. 2", 'title': 'Neighbours - S42 Ep. 9107',
'description': 'md5:fa278820ad90f08ea187f9458316ac74', 'alt_title': 'Thu 05 Sep',
'description': 'md5:37a1f4271be34b9ee2b533426a5fbaef',
'duration': 1388,
'episode': 'Episode 9107',
'episode_number': 9107,
'season': 'Season 42',
'season_number': 42,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'age_limit': 15, 'age_limit': 15,
'timestamp': 1600770600, 'timestamp': 1725517860,
'upload_date': '20200922', 'upload_date': '20240905',
'uploader': 'Channel 10', 'uploader': 'Channel 10',
'uploader_id': '2199827728001', 'uploader_id': '2199827728001',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Only available in Australia',
}, { }, {
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
'only_matching': True, 'only_matching': True,
@ -66,55 +73,42 @@ class TenPlayIE(InfoExtractor):
'X': 18, 'X': 18,
} }
def _get_bearer_token(self, video_id):
username, password = self._get_login_info()
if username is None or password is None:
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
_timestamp = dt.datetime.now().strftime('%Y%m%d000000')
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
'X-Network-Ten-Auth': _auth_header,
}, data=urlencode_postdata({
'email': username,
'password': password,
}))
return 'Bearer ' + data['jwt']['accessToken']
def _real_extract(self, url): def _real_extract(self, url):
content_id = self._match_id(url) content_id = self._match_id(url)
data = self._download_json( data = self._download_json(
'https://10play.com.au/api/v1/videos/' + content_id, content_id) 'https://10play.com.au/api/v1/videos/' + content_id, content_id)
headers = {}
if data.get('memberGated') is True: video_data = self._download_json(
_token = self._get_bearer_token(content_id) f'https://vod.ten.com.au/api/videos/bcquery?command=find_videos_by_id&video_id={data["altId"]}',
headers = {'Authorization': _token} content_id, 'Downloading video JSON')
m3u8_url = self._request_webpage(
_video_url = self._download_json( HEADRequest(video_data['items'][0]['HLSURL']),
data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON', content_id, 'Checking stream URL').url
headers=headers).get('source')
m3u8_url = self._request_webpage(HEADRequest(
_video_url), content_id).url
if '10play-not-in-oz' in m3u8_url: if '10play-not-in-oz' in m3u8_url:
self.raise_geo_restricted(countries=['AU']) self.raise_geo_restricted(countries=['AU'])
# Attempt to get a higher quality stream
m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000')
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4') formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
return { return {
'id': content_id,
'formats': formats, 'formats': formats,
'subtitles': {'en': [{'url': data.get('captionUrl')}]} if data.get('captionUrl') else None, 'subtitles': {'en': [{'url': data['captionUrl']}]} if url_or_none(data.get('captionUrl')) else None,
'id': data.get('altId') or content_id,
'duration': data.get('duration'),
'title': data.get('subtitle'),
'alt_title': data.get('title'),
'description': data.get('description'),
'age_limit': self._AUS_AGES.get(data.get('classification')),
'series': data.get('tvShow'),
'season_number': int_or_none(data.get('season')),
'episode_number': int_or_none(data.get('episode')),
'timestamp': data.get('published'),
'thumbnail': data.get('imageUrl'),
'uploader': 'Channel 10', 'uploader': 'Channel 10',
'uploader_id': '2199827728001', 'uploader_id': '2199827728001',
**traverse_obj(data, {
'id': ('altId', {str}),
'duration': ('duration', {int_or_none}),
'title': ('subtitle', {str}),
'alt_title': ('title', {str}),
'description': ('description', {str}),
'age_limit': ('classification', {self._AUS_AGES.get}),
'series': ('tvShow', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'timestamp': ('published', {int_or_none}),
'thumbnail': ('imageUrl', {url_or_none}),
}),
} }

View file

@ -23,7 +23,6 @@ from ..utils import (
mimetype2ext, mimetype2ext,
parse_qs, parse_qs,
qualities, qualities,
remove_start,
srt_subtitles_timecode, srt_subtitles_timecode,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
@ -254,7 +253,16 @@ class TikTokBaseIE(InfoExtractor):
def _get_subtitles(self, aweme_detail, aweme_id, user_name): def _get_subtitles(self, aweme_detail, aweme_id, user_name):
# TODO: Extract text positioning info # TODO: Extract text positioning info
EXT_MAP = { # From lowest to highest preference
'creator_caption': 'json',
'srt': 'srt',
'webvtt': 'vtt',
}
preference = qualities(tuple(EXT_MAP.values()))
subtitles = {} subtitles = {}
# aweme/detail endpoint subs # aweme/detail endpoint subs
captions_info = traverse_obj( captions_info = traverse_obj(
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict) aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
@ -278,8 +286,8 @@ class TikTokBaseIE(InfoExtractor):
if not caption.get('url'): if not caption.get('url'):
continue continue
subtitles.setdefault(caption.get('lang') or 'en', []).append({ subtitles.setdefault(caption.get('lang') or 'en', []).append({
'ext': remove_start(caption.get('caption_format'), 'web'),
'url': caption['url'], 'url': caption['url'],
'ext': EXT_MAP.get(caption.get('Format')),
}) })
# webpage subs # webpage subs
if not subtitles: if not subtitles:
@ -288,9 +296,14 @@ class TikTokBaseIE(InfoExtractor):
self._create_url(user_name, aweme_id), aweme_id, fatal=False) self._create_url(user_name, aweme_id), aweme_id, fatal=False)
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])): for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({ subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
'ext': remove_start(caption.get('Format'), 'web'),
'url': caption['Url'], 'url': caption['Url'],
'ext': EXT_MAP.get(caption.get('Format')),
}) })
# Deprioritize creator_caption json since it can't be embedded or used by media players
for lang, subs_list in subtitles.items():
subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))
return subtitles return subtitles
def _parse_url_key(self, url_key): def _parse_url_key(self, url_key):
@ -529,16 +542,12 @@ class TikTokBaseIE(InfoExtractor):
**COMMON_FORMAT_INFO, **COMMON_FORMAT_INFO,
'format_id': 'download', 'format_id': 'download',
'url': self._proto_relative_url(download_url), 'url': self._proto_relative_url(download_url),
'format_note': 'watermarked',
'preference': -2,
}) })
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']):
f.update({
'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '),
'preference': f.get('preference') or -2,
})
# Is it a slideshow with only audio for download? # Is it a slideshow with only audio for download?
if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})): if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})):
audio_url = aweme_detail['music']['playUrl'] audio_url = aweme_detail['music']['playUrl']
@ -552,7 +561,8 @@ class TikTokBaseIE(InfoExtractor):
'vcodec': 'none', 'vcodec': 'none',
}) })
return formats # Filter out broken formats, see https://github.com/yt-dlp/yt-dlp/issues/11034
return [f for f in formats if urllib.parse.urlparse(f['url']).hostname != 'www.tiktok.com']
def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False): def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False):
author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), { author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), {

View file

@ -1,60 +1,29 @@
import functools import functools
import re import re
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
class TVAIE(InfoExtractor): class TVAIE(InfoExtractor):
_VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)' IE_NAME = 'tvaplus'
IE_DESC = 'TVA+'
_VALID_URL = r'https?://(?:www\.)?tvaplus\.ca/(?:[^/?#]+/)*[\w-]+-(?P<id>\d+)(?:$|[#?])'
_TESTS = [{ _TESTS = [{
'url': 'https://videos.tva.ca/details/_5596811470001', 'url': 'https://www.tvaplus.ca/tva/alerte-amber/saison-1/episode-01-1000036619',
'info_dict': {
'id': '5596811470001',
'ext': 'mp4',
'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
'uploader_id': '5481942443001',
'upload_date': '20171003',
'timestamp': 1507064617,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'https://video.tva.ca/details/_5596811470001',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'id': video_id,
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
'ie_key': 'BrightcoveNew',
}
class QubIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
'md5': '949490fd0e7aee11d0543777611fbd53', 'md5': '949490fd0e7aee11d0543777611fbd53',
'info_dict': { 'info_dict': {
'id': '6084352463001', 'id': '6084352463001',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ép 01. Mon dernier jour', 'title': 'Mon dernier jour',
'uploader_id': '5481942443001', 'uploader_id': '5481942443001',
'upload_date': '20190907', 'upload_date': '20190907',
'timestamp': 1567899756, 'timestamp': 1567899756,
'description': 'md5:9c0d7fbb90939420c651fd977df90145', 'description': 'md5:9c0d7fbb90939420c651fd977df90145',
'thumbnail': r're:https://.+\.jpg', 'thumbnail': r're:https://.+\.jpg',
'episode': 'Ép 01. Mon dernier jour', 'episode': 'Mon dernier jour',
'episode_number': 1, 'episode_number': 1,
'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'], 'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
'duration': 2625.963, 'duration': 2625.963,
@ -64,23 +33,36 @@ class QubIE(InfoExtractor):
'channel': 'TVA', 'channel': 'TVA',
}, },
}, { }, {
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943', 'url': 'https://www.tvaplus.ca/tva/le-baiser-du-barbu/le-baiser-du-barbu-886644190',
'only_matching': True, 'info_dict': {
'id': '6354448043112',
'ext': 'mp4',
'title': 'Le Baiser du barbu',
'uploader_id': '5481942443001',
'upload_date': '20240606',
'timestamp': 1717694023,
'description': 'md5:025b1219086c1cbf4bc27e4e034e8b57',
'thumbnail': r're:https://.+\.jpg',
'episode': 'Le Baiser du barbu',
'tags': ['fullepisode', 'films'],
'duration': 6053.504,
'series': 'Le Baiser du barbu',
'channel': 'TVA',
},
}] }]
# reference_id also works with old account_id(5481942443001) _BC_URL_TMPL = 'https://players.brightcove.net/5481942443001/default_default/index.html?videoId={}'
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
def _real_extract(self, url): def _real_extract(self, url):
entity_id = self._match_id(url) entity_id = self._match_id(url)
webpage = self._download_webpage(url, entity_id) webpage = self._download_webpage(url, entity_id)
entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData'] entity = self._search_nextjs_data(webpage, entity_id)['props']['pageProps']['staticEntity']
video_id = entity['videoId'] video_id = entity['videoId']
episode = strip_or_none(entity.get('name')) episode = strip_or_none(entity.get('name'))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': f'https://videos.tva.ca/details/_{video_id}', 'url': smuggle_url(self._BC_URL_TMPL.format(video_id), {'geo_countries': ['CA']}),
'ie_key': TVAIE.ie_key(), 'ie_key': BrightcoveNewIE.ie_key(),
'id': video_id, 'id': video_id,
'title': episode, 'title': episode,
'episode': episode, 'episode': episode,

View file

@ -10,7 +10,7 @@ from ..utils import (
class TVerIE(InfoExtractor): class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'skip': 'videos are only available for 7 days', 'skip': 'videos are only available for 7 days',
'url': 'https://tver.jp/episodes/ep83nf3w4p', 'url': 'https://tver.jp/episodes/ep83nf3w4p',
@ -23,6 +23,20 @@ class TVerIE(InfoExtractor):
'channel': 'テレビ朝日', 'channel': 'テレビ朝日',
}, },
'add_ie': ['BrightcoveNew'], 'add_ie': ['BrightcoveNew'],
}, {
'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
'info_dict': {
'id': '6359578055112',
'ext': 'mp4',
'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
'timestamp': 1722279928,
'upload_date': '20240729',
'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
'uploader_id': '4774017240001',
'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
'duration': 670.571,
},
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://tver.jp/corner/f0103888', 'url': 'https://tver.jp/corner/f0103888',
'only_matching': True, 'only_matching': True,
@ -47,7 +61,15 @@ class TVerIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type') video_id, video_type = self._match_valid_url(url).group('id', 'type')
if video_type not in {'series', 'episodes'}:
if video_type == 'olympic/paris2024/video':
# Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
# https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
return self.url_result(smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
{'geo_countries': ['JP']}), 'BrightcoveNew')
elif video_type not in {'series', 'episodes'}:
webpage = self._download_webpage(url, video_id, note='Resolving to new URL') webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id(self._search_regex( video_id = self._match_id(self._search_regex(
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),

View file

@ -8,7 +8,7 @@ from ..utils import (
class TVN24IE(InfoExtractor): class TVN24IE(InfoExtractor):
_WORKING = False _WORKING = False
_VALID_URL = r'https?://(?:(?:[^/]+)\.)?tvn24(?:bis)?\.pl/(?:[^/]+/)*(?P<id>[^/]+)' _VALID_URL = r'https?://(?:(?!eurosport)[^/]+\.)?tvn24(?:bis)?\.pl/(?:[^/?#]+/)*(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html', 'url': 'http://www.tvn24.pl/wiadomosci-z-kraju,3/oredzie-artura-andrusa,702428.html',
'md5': 'fbdec753d7bc29d96036808275f2130c', 'md5': 'fbdec753d7bc29d96036808275f2130c',

View file

@ -270,7 +270,7 @@ class TwitCastingLiveIE(InfoExtractor):
class TwitCastingUserIE(InfoExtractor): class TwitCastingUserIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(:?show|archive)/?(?:[#?]|$)' _VALID_URL = r'https?://(?:[^/?#]+\.)?twitcasting\.tv/(?P<id>[^/?#]+)/(?:show|archive)/?(?:[#?]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://twitcasting.tv/natsuiromatsuri/archive/', 'url': 'https://twitcasting.tv/natsuiromatsuri/archive/',
'info_dict': { 'info_dict': {

View file

@ -1764,7 +1764,7 @@ class TwitterSpacesIE(TwitterBaseIE):
'release_timestamp': 1659904215, 'release_timestamp': 1659904215,
'release_date': '20220807', 'release_date': '20220807',
}, },
'params': {'skip_download': 'm3u8'}, 'skip': 'No longer available',
}, { }, {
# post_live/TimedOut but downloadable # post_live/TimedOut but downloadable
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl', 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
@ -1780,6 +1780,8 @@ class TwitterSpacesIE(TwitterBaseIE):
'upload_date': '20230413', 'upload_date': '20230413',
'release_timestamp': 1681839000, 'release_timestamp': 1681839000,
'release_date': '20230418', 'release_date': '20230418',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -1790,11 +1792,31 @@ class TwitterSpacesIE(TwitterBaseIE):
'ext': 'm4a', 'ext': 'm4a',
'title': '', 'title': '',
'description': 'Twitter Space participated by nobody yet', 'description': 'Twitter Space participated by nobody yet',
'uploader': '息根とめる🔪Twitchで復活', 'uploader': '息根とめる',
'uploader_id': 'tomeru_ikinone', 'uploader_id': 'tomeru_ikinone',
'live_status': 'was_live', 'live_status': 'was_live',
'timestamp': 1685617198, 'timestamp': 1685617198,
'upload_date': '20230601', 'upload_date': '20230601',
'protocol': 'm3u8', # ffmpeg is forced
'container': 'm4a_dash', # audio-only format fixup is applied
},
'params': {'skip_download': 'm3u8'},
}, {
# Video Space
'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
'info_dict': {
'id': '1DXGydznBYWKM',
'ext': 'mp4',
'title': 'America and Israels “special relationship”',
'description': 'Twitter Space participated by nobody yet',
'uploader': 'Candace Owens',
'uploader_id': 'RealCandaceO',
'live_status': 'was_live',
'timestamp': 1723931351,
'upload_date': '20240817',
'release_timestamp': 1723932000,
'release_date': '20240817',
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}] }]
@ -1854,13 +1876,17 @@ class TwitterSpacesIE(TwitterBaseIE):
source = traverse_obj( source = traverse_obj(
self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']), self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False) ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader is_audio_space = source and 'audio-space' in source
source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live, formats = self._extract_m3u8_formats(
headers=headers, fatal=False) if source else [] source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
for fmt in formats: # XXX: Some audio-only Spaces need ffmpeg as downloader
fmt.update({'vcodec': 'none', 'acodec': 'aac'}) entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
if not is_live: live=is_live, headers=headers, fatal=False) if source else []
fmt['container'] = 'm4a_dash' if is_audio_space:
for fmt in formats:
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
if not is_live:
fmt['container'] = 'm4a_dash'
participants = ', '.join(traverse_obj( participants = ', '.join(traverse_obj(
space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet' space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'

View file

@ -49,6 +49,7 @@ class KnownDRMIE(UnsupportedInfoExtractor):
r'amazon\.(?:\w{2}\.)?\w+/gp/video', r'amazon\.(?:\w{2}\.)?\w+/gp/video',
r'music\.amazon\.(?:\w{2}\.)?\w+', r'music\.amazon\.(?:\w{2}\.)?\w+',
r'(?:watch|front)\.njpwworld\.com', r'(?:watch|front)\.njpwworld\.com',
r'qub\.ca/vrai',
) )
_TESTS = [{ _TESTS = [{
@ -149,6 +150,9 @@ class KnownDRMIE(UnsupportedInfoExtractor):
}, { }, {
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs', 'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.qub.ca/vrai/l-effet-bocuse-d-or/saison-1/l-effet-bocuse-d-or-saison-1-bande-annonce-1098225063',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

148
yt_dlp/extractor/vidflex.py Normal file
View file

@ -0,0 +1,148 @@
import base64
import json
from .common import InfoExtractor
from ..utils import (
int_or_none,
join_nonempty,
mimetype2ext,
url_or_none,
)
from ..utils.traversal import traverse_obj
class VidflexIE(InfoExtractor):
_DOMAINS_RE = [
r'[^.]+\.vidflex\.tv',
r'(?:www\.)?acactv\.ca',
r'(?:www\.)?albertalacrossetv\.com',
r'(?:www\.)?cjfltv\.com',
r'(?:www\.)?figureitoutbaseball\.com',
r'(?:www\.)?ocaalive\.com',
r'(?:www\.)?pegasussports\.tv',
r'(?:www\.)?praxisseries\.ca',
r'(?:www\.)?silenticetv\.com',
r'(?:www\.)?tuffhedemantv\.com',
r'(?:www\.)?watchfuntv\.com',
r'live\.ofsaa\.on\.ca',
r'tv\.procoro\.ca',
r'tv\.realcastmedia\.net',
r'tv\.fringetheatre\.ca',
r'video\.haisla\.ca',
r'video\.hockeycanada\.ca',
r'video\.huuayaht\.org',
r'video\.turningpointensemble\.ca',
r'videos\.livingworks\.net',
r'videos\.telusworldofscienceedmonton\.ca',
r'watch\.binghamtonbulldogs\.com',
r'watch\.rekindle\.tv',
r'watch\.wpca\.com',
]
_VALID_URL = rf'https?://(?:{"|".join(_DOMAINS_RE)})/[a-z]{{2}}(?:-[a-z]{{2}})?/c/[\w-]+\.(?P<id>\d+)'
_TESTS = [{
'url': 'https://video.hockeycanada.ca/en/c/nwt-micd-up-with-jamie-lee-rattray.107486',
'only_matching': True,
}, {
# m3u8 + https
'url': 'https://video.hockeycanada.ca/en-us/c/nwt-micd-up-with-jamie-lee-rattray.107486',
'info_dict': {
'id': '107486',
'title': 'NWT: Micd up with Jamie Lee Rattray',
'ext': 'mp4',
'duration': 115,
'timestamp': 1634310409,
'upload_date': '20211015',
'tags': ['English', '2021', "National Women's Team"],
'description': 'md5:efb1cf6165b48cc3f5555c4262dd5b23',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
'url': 'https://video.hockeycanada.ca/en/c/mwc-remembering-the-wild-ride-in-riga.112307',
'info_dict': {
'id': '112307',
'title': 'MWC: Remembering the wild ride in Riga',
'ext': 'mp4',
'duration': 322,
'timestamp': 1716235607,
'upload_date': '20240520',
'tags': ['English', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
'description': r're:.+Canadas National Mens Team.+',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
# the same video in French
'url': 'https://video.hockeycanada.ca/fr/c/cmm-retour-sur-un-parcours-endiable-a-riga.112304',
'info_dict': {
'id': '112304',
'title': 'CMM : Retour sur un parcours endiablé à Riga',
'ext': 'mp4',
'duration': 322,
'timestamp': 1716235545,
'upload_date': '20240520',
'tags': ['French', '2024', "National Men's Team", 'IIHF World Championship', 'Fan'],
'description': 'md5:cf825222882a3dab1cd62cffcf3b4d1f',
'thumbnail': r're:^https?://wpmedia01-a\.akamaihd\.net/en/asset/public/image/.+',
},
'params': {'skip_download': True},
}, {
'url': 'https://myfbcgreenville.vidflex.tv/en/c/may-12th-2024.658',
'only_matching': True,
}, {
'url': 'https://www.figureitoutbaseball.com/en/c/fiob-podcast-14-dan-bertolini-ncaa-d1-head-coach-recorded-11-29-2018.1367',
'only_matching': True,
}, {
'url': 'https://videos.telusworldofscienceedmonton.ca/en/c/the-aurora-project-timelapse-4.577',
'only_matching': True,
}, {
'url': 'https://www.tuffhedemantv.com/en/c/2022-tuff-hedeman-tour-hobbs-nm-january-22.227',
'only_matching': True,
}, {
'url': 'https://www.albertalacrossetv.com/en/c/up-floor-ground-balls-one-more.3449',
'only_matching': True,
}, {
'url': 'https://www.silenticetv.com/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
'only_matching': True,
}, {
'url': 'https://jphl.vidflex.tv/en/c/jp-unlocked-day-in-the-life-of-langley-ha-15u.5197',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
data_url = self._html_search_regex(
r'content_api:\s*(["\'])(?P<url>https?://(?:(?!\1).)+)\1', webpage, 'content api url', group='url')
media_config = traverse_obj(
self._download_json(data_url, video_id),
('config', {base64.b64decode}, {bytes.decode}, {json.loads}, {dict}))
return {
'id': video_id,
'formats': list(self._yield_formats(media_config, video_id)),
**self._search_json_ld(
webpage.replace('/*<![CDATA[*/', '').replace('/*]]>*/', ''), video_id),
}
def _yield_formats(self, media_config, video_id):
for media_source in traverse_obj(media_config, ('media', 'source', lambda _, v: url_or_none(v['src']))):
media_url = media_source['src']
media_type = mimetype2ext(media_source.get('type'))
if media_type == 'm3u8':
yield from self._extract_m3u8_formats(media_url, video_id, fatal=False, m3u8_id='hls')
elif media_type == 'mp4':
bitrate = self._search_regex(r'_(\d+)k\.mp4', media_url, 'bitrate', default=None)
yield {
'format_id': join_nonempty('http', bitrate),
'url': media_url,
'ext': 'mp4',
'tbr': int_or_none(bitrate),
}
else:
yield {
'url': media_url,
'ext': media_type,
}

Some files were not shown because too many files have changed in this diff Show more