diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index cd7ead796..4bed5af6a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -164,7 +164,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
- name: build-${{ github.job }}
+ name: build-bin-${{ github.job }}
path: |
yt-dlp
yt-dlp.tar.gz
@@ -227,7 +227,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
- name: build-linux_${{ matrix.architecture }}
+ name: build-bin-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0
@@ -271,7 +271,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
- name: build-${{ github.job }}
+ name: build-bin-${{ github.job }}
path: |
dist/yt-dlp_macos
dist/yt-dlp_macos.zip
@@ -324,7 +324,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
- name: build-${{ github.job }}
+ name: build-bin-${{ github.job }}
path: |
dist/yt-dlp_macos_legacy
compression-level: 0
@@ -373,7 +373,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
- name: build-${{ github.job }}
+ name: build-bin-${{ github.job }}
path: |
dist/yt-dlp.exe
dist/yt-dlp_min.exe
@@ -421,7 +421,7 @@ jobs:
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
- name: build-${{ github.job }}
+ name: build-bin-${{ github.job }}
path: |
dist/yt-dlp_x86.exe
compression-level: 0
@@ -441,7 +441,7 @@ jobs:
- uses: actions/download-artifact@v4
with:
path: artifact
- pattern: build-*
+ pattern: build-bin-*
merge-multiple: true
- name: Make SHA2-SUMS files
@@ -484,3 +484,4 @@ jobs:
_update_spec
SHA*SUMS*
compression-level: 0
+ overwrite: true
diff --git a/README.md b/README.md
index 2fcb09917..99235220a 100644
--- a/README.md
+++ b/README.md
@@ -167,8 +167,8 @@ For ease of use, a few more compat options are available:
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
-* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress`
-* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options
+* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
+* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
# INSTALLATION
@@ -218,7 +218,7 @@ Example usage:
yt-dlp --update-to nightly
# To install nightly with pip:
-python -m pip install -U --pre yt-dlp
+python -m pip install -U --pre yt-dlp[default]
```
@@ -1310,8 +1310,11 @@ The available fields are:
- `description` (string): The description of the video
- `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader
+ - `uploader_id` (string): Nickname or id of the video uploader
+ - `uploader_url` (string): URL to the video uploader's profile
- `license` (string): License name the video is licensed under
- - `creator` (string): The creator of the video
+ - `creators` (list): The creators of the video
+ - `creator` (string): The creators of the video; comma-separated
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date in UTC (YYYYMMDD)
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
@@ -1319,9 +1322,9 @@ The available fields are:
- `release_year` (numeric): Year (YYYY) when the video or album was released
- `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
- `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC
- - `uploader_id` (string): Nickname or id of the video uploader
- `channel` (string): Full name of the channel the video is uploaded on
- `channel_id` (string): Id of the channel
+ - `channel_url` (string): URL of the channel
- `channel_follower_count` (numeric): Number of followers of the channel
- `channel_is_verified` (boolean): Whether the channel is verified on the platform
- `location` (string): Physical location where the video was filmed
@@ -1361,7 +1364,10 @@ The available fields are:
- `webpage_url_basename` (string): The basename of the webpage URL
- `webpage_url_domain` (string): The domain of the webpage URL
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
-
+ - `categories` (list): List of categories the video belongs to
+ - `tags` (list): List of tags assigned to the video
+ - `cast` (list): List of cast members
+
All the fields in [Filtering Formats](#filtering-formats) can also be used
Available for the video that belongs to some logical chapter or section:
@@ -1373,6 +1379,7 @@ Available for the video that belongs to some logical chapter or section:
Available for the video that is an episode of some series or programme:
- `series` (string): Title of the series or programme the video episode belongs to
+ - `series_id` (string): Id of the series or programme the video episode belongs to
- `season` (string): Title of the season the video episode belongs to
- `season_number` (numeric): Number of the season the video episode belongs to
- `season_id` (string): Id of the season the video episode belongs to
@@ -1385,11 +1392,16 @@ Available for the media that is a track or a part of a music album:
- `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track
- - `artist` (string): Artist(s) of the track
- - `genre` (string): Genre(s) of the track
+ - `artists` (list): Artist(s) of the track
+ - `artist` (string): Artist(s) of the track; comma-separated
+ - `genres` (list): Genre(s) of the track
+ - `genre` (string): Genre(s) of the track; comma-separated
+ - `composers` (list): Composer(s) of the piece
+ - `composer` (string): Composer(s) of the piece; comma-separated
- `album` (string): Title of the album the track belongs to
- `album_type` (string): Type of the album
- - `album_artist` (string): List of all artists appeared on the album
+ - `album_artists` (list): All artists appeared on the album
+ - `album_artist` (string): All artists appeared on the album; comma-separated
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
@@ -1767,10 +1779,11 @@ Metadata fields | From
`description`, `synopsis` | `description`
`purl`, `comment` | `webpage_url`
`track` | `track_number`
-`artist` | `artist`, `creator`, `uploader` or `uploader_id`
-`genre` | `genre`
+`artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
+`composer` | `composer` or `composers`
+`genre` | `genre` or `genres`
`album` | `album`
-`album_artist` | `album_artist`
+`album_artist` | `album_artist` or `album_artists`
`disc` | `disc_number`
`show` | `series`
`season_number` | `season_number`
diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py
index 715e5b044..889d9abeb 100755
--- a/devscripts/install_deps.py
+++ b/devscripts/install_deps.py
@@ -19,7 +19,7 @@ def parse_args():
parser.add_argument(
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
parser.add_argument(
- '-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency')
+ '-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
parser.add_argument(
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
parser.add_argument(
@@ -33,21 +33,28 @@ def parse_args():
def main():
args = parse_args()
- toml_data = parse_toml(read_file(args.input))
- deps = toml_data['project']['dependencies']
- targets = deps.copy() if not args.only_optional else []
-
- for exclude in args.exclude or []:
- for dep in deps:
- simplified_dep = re.match(r'[\w-]+', dep)[0]
- if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep):
- targets.remove(dep)
-
- optional_deps = toml_data['project']['optional-dependencies']
- for include in args.include or []:
- group = optional_deps.get(include)
- if group:
- targets.extend(group)
+ project_table = parse_toml(read_file(args.input))['project']
+ optional_groups = project_table['optional-dependencies']
+ excludes = args.exclude or []
+
+ deps = []
+ if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
+ deps.extend(project_table['dependencies'])
+ if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
+ deps.extend(optional_groups['default'])
+
+ def name(dependency):
+ return re.match(r'[\w-]+', dependency)[0].lower()
+
+ target_map = {name(dep): dep for dep in deps}
+
+ for include in filter(None, map(optional_groups.get, args.include or [])):
+ target_map.update(zip(map(name, include), include))
+
+ for exclude in map(name, excludes):
+ target_map.pop(exclude, None)
+
+ targets = list(target_map.values())
if args.print:
for target in targets:
diff --git a/pyproject.toml b/pyproject.toml
index 0c9c5fc01..dda43288f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,7 @@ dependencies = [
]
[project.optional-dependencies]
+default = []
secretstorage = [
"cffi",
"secretstorage",
diff --git a/test/helper.py b/test/helper.py
index 4aca47025..7760fd8d7 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id')
+ # Remove deprecated fields
+ for old in YoutubeDL._deprecated_multivalue_fields.keys():
+ test_info_dict.pop(old, None)
+
# release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year')
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 0087cbc94..6be47af97 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True})
for v in videos:
- ydl.process_ie_result(v, download=True)
+ ydl.process_ie_result(v.copy(), download=True)
return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos()
diff --git a/test/test_websockets.py b/test/test_websockets.py
index 91bac3442..13b3a1e76 100644
--- a/test/test_websockets.py
+++ b/test/test_websockets.py
@@ -192,8 +192,8 @@ class TestWebsSocketRequestHandlerConformance:
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('params,extensions', [
- ({'timeout': 0.00001}, {}),
- ({}, {'timeout': 0.00001}),
+ ({'timeout': sys.float_info.min}, {}),
+ ({}, {'timeout': sys.float_info.min}),
])
def test_timeout(self, handler, params, extensions):
with handler(**params) as rh:
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index e7d654d0f..ef66306b1 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -580,6 +580,13 @@ class YoutubeDL:
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
}
+ _deprecated_multivalue_fields = {
+ 'album_artist': 'album_artists',
+ 'artist': 'artists',
+ 'composer': 'composers',
+ 'creator': 'creators',
+ 'genre': 'genres',
+ }
_format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
@@ -683,7 +690,6 @@ class YoutubeDL:
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
self.params['http_headers'].pop('Cookie', None)
- self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header()
@@ -957,6 +963,7 @@ class YoutubeDL:
def close(self):
self.save_cookies()
self._request_director.close()
+ del self._request_director
def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears.
@@ -2640,6 +2647,14 @@ class YoutubeDL:
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+ for old_key, new_key in self._deprecated_multivalue_fields.items():
+ if new_key in info_dict and old_key in info_dict:
+ self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
+ elif old_value := info_dict.get(old_key):
+ info_dict[new_key] = old_value.split(', ')
+ elif new_value := info_dict.get(new_key):
+ info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
+
def _raise_pending_errors(self, info):
err = info.pop('__pending_error', None)
if err:
@@ -3483,7 +3498,8 @@ class YoutubeDL:
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP)
- ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
+ ffmpeg_fixup(downloader == 'dashsegments'
+ and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
@@ -4144,6 +4160,10 @@ class YoutubeDL:
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
return director
+ @functools.cached_property
+ def _request_director(self):
+ return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
+
def encode(self, s):
if isinstance(s, bytes):
return s # Already encoded
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 57a487157..4380b888d 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -14,7 +14,7 @@ import os
import re
import traceback
-from .compat import compat_shlex_quote
+from .compat import compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes
@@ -984,7 +984,28 @@ def _real_main(argv=None):
if pre_process:
return ydl._download_retcode
- ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
+ args = sys.argv[1:] if argv is None else argv
+ ydl.warn_if_short_id(args)
+
+ # Show a useful error message and wait for keypress if not launched from shell on Windows
+ if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
+ import ctypes.wintypes
+ import msvcrt
+
+ kernel32 = ctypes.WinDLL('Kernel32')
+
+ buffer = (1 * ctypes.wintypes.DWORD)()
+ attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
+ # If we only have a single process attached, then the executable was double clicked
+ # When using `pyinstaller` with `--onefile`, two processes get attached
+ is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
+ if attached_processes == 1 or is_onefile and attached_processes == 2:
+ print(parser._generate_error_message(
+ 'Do not double-click the executable, instead call it from a command line.\n'
+ 'Please read the README for further information on how to use yt-dlp: '
+ 'https://github.com/yt-dlp/yt-dlp#readme'))
+ msvcrt.getch()
+ _exit(2)
parser.error(
'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.')
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 4cfa5b442..026d85fbb 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -320,7 +320,6 @@ from .cbs import (
CBSIE,
ParamountPressExpressIE,
)
-from .cbsinteractive import CBSInteractiveIE
from .cbsnews import (
CBSNewsEmbedIE,
CBSNewsIE,
@@ -348,10 +347,6 @@ from .cgtn import CGTNIE
from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE
from .chilloutzone import ChilloutzoneIE
-from .chingari import (
- ChingariIE,
- ChingariUserIE,
-)
from .chzzk import (
CHZZKLiveIE,
CHZZKVideoIE,
@@ -369,7 +364,6 @@ from .ciscolive import (
from .ciscowebex import CiscoWebexIE
from .cjsw import CJSWIE
from .clipchamp import ClipchampIE
-from .cliphunter import CliphunterIE
from .clippit import ClippitIE
from .cliprs import ClipRsIE
from .closertotruth import CloserToTruthIE
@@ -379,7 +373,6 @@ from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
from .cnbc import (
- CNBCIE,
CNBCVideoIE,
)
from .cnn import (
@@ -445,6 +438,7 @@ from .dailymail import DailyMailIE
from .dailymotion import (
DailymotionIE,
DailymotionPlaylistIE,
+ DailymotionSearchIE,
DailymotionUserIE,
)
from .dailywire import (
@@ -476,7 +470,6 @@ from .dlf import (
)
from .dfb import DFBIE
from .dhm import DHMIE
-from .digg import DiggIE
from .douyutv import (
DouyuShowIE,
DouyuTVIE,
@@ -610,7 +603,6 @@ from .fc2 import (
)
from .fczenit import FczenitIE
from .fifa import FifaIE
-from .filmmodu import FilmmoduIE
from .filmon import (
FilmOnIE,
FilmOnChannelIE,
@@ -676,7 +668,6 @@ from .gab import (
GabIE,
)
from .gaia import GaiaIE
-from .gameinformer import GameInformerIE
from .gamejolt import (
GameJoltIE,
GameJoltUserIE,
@@ -705,7 +696,6 @@ from .gettr import (
GettrStreamingIE,
)
from .giantbomb import GiantBombIE
-from .giga import GigaIE
from .glide import GlideIE
from .globalplayer import (
GlobalPlayerLiveIE,
@@ -896,10 +886,8 @@ from .jtbc import (
from .jwplatform import JWPlatformIE
from .kakao import KakaoIE
from .kaltura import KalturaIE
-from .kanal2 import Kanal2IE
from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE
-from .karrierevideos import KarriereVideosIE
from .kelbyone import KelbyOneIE
from .khanacademy import (
KhanAcademyIE,
@@ -915,13 +903,11 @@ from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE
from .kompas import KompasVideoIE
-from .konserthusetplay import KonserthusetPlayIE
from .koo import KooIE
from .kth import KTHIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .kukululive import KukuluLiveIE
-from .kusi import KUSIIE
from .kuwo import (
KuwoIE,
KuwoAlbumIE,
@@ -1003,7 +989,6 @@ from .lnkgo import (
LnkGoIE,
LnkIE,
)
-from .localnews8 import LocalNews8IE
from .lovehomeporn import LoveHomePornIE
from .lrt import (
LRTVODIE,
@@ -1030,7 +1015,6 @@ from .mailru import (
MailRuMusicSearchIE,
)
from .mainstreaming import MainStreamingIE
-from .malltv import MallTVIE
from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
@@ -1074,7 +1058,6 @@ from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .metacritic import MetacriticIE
from .mgtv import MGTVIE
-from .miaopai import MiaoPaiIE
from .microsoftstream import MicrosoftStreamIE
from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyIE,
@@ -1092,7 +1075,6 @@ from .minds import (
MindsChannelIE,
MindsGroupIE,
)
-from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .mirrativ import (
MirrativIE,
@@ -1120,7 +1102,6 @@ from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE
from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE
-from .morningstar import MorningstarIE
from .motherless import (
MotherlessIE,
MotherlessGroupIE,
@@ -1365,7 +1346,6 @@ from .nuvid import NuvidIE
from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE
from .nzz import NZZIE
-from .odatv import OdaTVIE
from .odkmedia import OnDemandChinaEpisodeIE
from .odnoklassniki import OdnoklassnikiIE
from .oftv import (
@@ -1477,7 +1457,6 @@ from .platzi import (
PlatziCourseIE,
)
from .playplustv import PlayPlusTVIE
-from .playstuff import PlayStuffIE
from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE
from .playwire import PlaywireIE
@@ -1599,7 +1578,6 @@ from .raywenderlich import (
RayWenderlichIE,
RayWenderlichCourseIE,
)
-from .rbmaradio import RBMARadioIE
from .rbgtum import (
RbgTumIE,
RbgTumCourseIE,
@@ -1631,7 +1609,6 @@ from .redgifs import (
RedGifsUserIE,
)
from .redtube import RedTubeIE
-from .regiotv import RegioTVIE
from .rentv import (
RENTVIE,
RENTVArticleIE,
@@ -1640,6 +1617,7 @@ from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE
+from .ridehome import RideHomeIE
from .rinsefm import (
RinseFMIE,
RinseFMArtistPlaylistIE,
@@ -1738,7 +1716,6 @@ from .safari import (
from .saitosan import SaitosanIE
from .samplefocus import SampleFocusIE
from .sapo import SapoIE
-from .savefrom import SaveFromIE
from .sbs import SBSIE
from .sbscokr import (
SBSCoKrIE,
@@ -1758,7 +1735,6 @@ from .scte import (
SCTECourseIE,
)
from .scrolller import ScrolllerIE
-from .seeker import SeekerIE
from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE
from .senategov import SenateISVPIE, SenateGovIE
@@ -1901,7 +1877,6 @@ from .storyfire import (
)
from .streamable import StreamableIE
from .streamcz import StreamCZIE
-from .streamff import StreamFFIE
from .streetvoice import StreetVoiceIE
from .stretchinternet import StretchInternetIE
from .stripchat import StripchatIE
@@ -1930,7 +1905,6 @@ from .tbsjp import (
TBSJPProgramIE,
TBSJPPlaylistIE,
)
-from .tdslifeway import TDSLifewayIE
from .teachable import (
TeachableIE,
TeachableCourseIE,
@@ -2500,6 +2474,7 @@ from .zee5 import (
Zee5SeriesIE,
)
from .zeenews import ZeeNewsIE
+from .zenporn import ZenPornIE
from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE
from .zingmp3 import (
diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
index a7b614ca1..b21742281 100644
--- a/yt_dlp/extractor/abc.py
+++ b/yt_dlp/extractor/abc.py
@@ -245,7 +245,6 @@ class ABCIViewIE(InfoExtractor):
'episode_id': 'NC2203H039S00',
'season_number': 2022,
'season': 'Season 2022',
- 'episode_number': None,
'episode': 'Locking Up Kids',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
'timestamp': 1668460497,
@@ -271,8 +270,6 @@ class ABCIViewIE(InfoExtractor):
'episode_id': 'RF2004Q043S00',
'season_number': 2021,
'season': 'Season 2021',
- 'episode_number': None,
- 'episode': None,
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
'timestamp': 1638710705,
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 6453dde97..6742f75d5 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -259,7 +259,7 @@ class AbemaTVIE(AbemaTVBaseIE):
'title': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
'series': 'ゆるキャン△ SEASON2',
'episode': 'ゆるキャン△ SEASON2 全話一挙【無料ビデオ72時間】',
- 'series_number': 2,
+ 'season_number': 2,
'episode_number': 1,
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
},
diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py
index dc5792944..c3b4f432e 100644
--- a/yt_dlp/extractor/acfun.py
+++ b/yt_dlp/extractor/acfun.py
@@ -3,6 +3,7 @@ from ..utils import (
float_or_none,
format_field,
int_or_none,
+ str_or_none,
traverse_obj,
parse_codecs,
parse_qs,
@@ -129,7 +130,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': '红孩儿之趴趴蛙寻石记 第5话 ',
'duration': 760.0,
'season': '红孩儿之趴趴蛙寻石记',
- 'season_id': 5023171,
+ 'season_id': '5023171',
'season_number': 1, # series has only 1 season
'episode': 'Episode 5',
'episode_number': 5,
@@ -146,7 +147,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': '叽歪老表(第二季) 第5话 坚不可摧',
'season': '叽歪老表(第二季)',
'season_number': 2,
- 'season_id': 6065485,
+ 'season_id': '6065485',
'episode': '坚不可摧',
'episode_number': 5,
'upload_date': '20220324',
@@ -191,7 +192,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': json_bangumi_data.get('showTitle'),
'thumbnail': json_bangumi_data.get('image'),
'season': json_bangumi_data.get('bangumiTitle'),
- 'season_id': season_id,
+ 'season_id': str_or_none(season_id),
'season_number': season_number,
'episode': json_bangumi_data.get('title'),
'episode_number': episode_number,
diff --git a/yt_dlp/extractor/altcensored.py b/yt_dlp/extractor/altcensored.py
index 0e1627bfd..a8428ce2e 100644
--- a/yt_dlp/extractor/altcensored.py
+++ b/yt_dlp/extractor/altcensored.py
@@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403',
- 'creator': 'Virginie Vota',
+ 'creators': ['Virginie Vota'],
'release_year': 2018,
'upload_date': '20230318',
'uploader': 'admin@altcensored.com',
@@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
'duration': 926.09,
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int,
- 'categories': ['News & Politics'],
+ 'categories': ['News & Politics'], # FIXME
}
}]
@@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
'title': 'Virginie Vota',
'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
},
- 'playlist_count': 91
+ 'playlist_count': 85,
}, {
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
'info_dict': {
'title': 'yukikaze775',
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
},
- 'playlist_count': 4
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
+ 'info_dict': {
+ 'title': 'Mister Metokur',
+ 'id': 'UCfYbb7nga6-icsFWWgS-kWw',
+ },
+ 'playlist_count': 121,
}]
def _real_extract(self, url):
@@ -78,7 +85,7 @@ class AltCensoredChannelIE(InfoExtractor):
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
page_count = int_or_none(self._html_search_regex(
- r']+href="/channel/\w+/page/(\d+)">(?:\1)',
+ r']+href="/channel/[\w-]+/page/(\d+)">(?:\1)',
webpage, 'page count', default='1'))
def page_func(page_num):
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index 3bb6f2e31..41f3a4ff2 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -31,6 +31,7 @@ from ..utils import (
unified_timestamp,
url_or_none,
urlhandle_detect_ext,
+ variadic,
)
@@ -49,7 +50,7 @@ class ArchiveOrgIE(InfoExtractor):
'release_date': '19681210',
'timestamp': 1268695290,
'upload_date': '20100315',
- 'creator': 'SRI International',
+ 'creators': ['SRI International'],
'uploader': 'laura@archive.org',
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
@@ -109,7 +110,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'Turning',
'ext': 'flac',
'track': 'Turning',
- 'creator': 'Grateful Dead',
+ 'creators': ['Grateful Dead'],
'display_id': 'gd1977-05-08d01t01.flac',
'track_number': 1,
'album': '1977-05-08 - Barton Hall - Cornell University',
@@ -129,7 +130,7 @@ class ArchiveOrgIE(InfoExtractor):
'location': 'Barton Hall - Cornell University',
'duration': 438.68,
'track': 'Deal',
- 'creator': 'Grateful Dead',
+ 'creators': ['Grateful Dead'],
'album': '1977-05-08 - Barton Hall - Cornell University',
'release_date': '19770508',
'display_id': 'gd1977-05-08d01t07.flac',
@@ -167,7 +168,7 @@ class ArchiveOrgIE(InfoExtractor):
'upload_date': '20160610',
'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
'uploader': 'dimitrios@archive.org',
- 'creator': ['British Broadcasting Corporation', 'Time-Life Films'],
+ 'creators': ['British Broadcasting Corporation', 'Time-Life Films'],
'timestamp': 1465594947,
},
'playlist': [
@@ -257,7 +258,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': m['title'],
'description': clean_html(m.get('description')),
'uploader': dict_get(m, ['uploader', 'adder']),
- 'creator': m.get('creator'),
+ 'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'license': m.get('licenseurl'),
'release_date': unified_strdate(m.get('date')),
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
@@ -272,7 +273,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': f.get('title') or f['name'],
'display_id': f['name'],
'description': clean_html(f.get('description')),
- 'creator': f.get('creator'),
+ 'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'duration': parse_duration(f.get('length')),
'track_number': int_or_none(f.get('track')),
'album': f.get('album'),
@@ -300,7 +301,7 @@ class ArchiveOrgIE(InfoExtractor):
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({
- 'url': 'https://archive.org/download/' + identifier + '/' + f['name'],
+ 'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
'format': f.get('format'),
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
diff --git a/yt_dlp/extractor/axs.py b/yt_dlp/extractor/axs.py
index 4b263725f..7e9166771 100644
--- a/yt_dlp/extractor/axs.py
+++ b/yt_dlp/extractor/axs.py
@@ -24,7 +24,8 @@ class AxsIE(InfoExtractor):
'timestamp': 1685729564,
'duration': 1284.216,
'series': 'Rock & Roll Road Trip with Sammy Hagar',
- 'season': 2,
+ 'season': 'Season 2',
+ 'season_number': 2,
'episode': '3',
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
},
@@ -41,7 +42,8 @@ class AxsIE(InfoExtractor):
'timestamp': 1676403615,
'duration': 2570.668,
'series': 'The Big Interview with Dan Rather',
- 'season': 3,
+ 'season': 'Season 3',
+ 'season_number': 3,
'episode': '5',
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
},
@@ -77,7 +79,7 @@ class AxsIE(InfoExtractor):
'title': ('title', {str}),
'description': ('description', {str}),
'series': ('seriestitle', {str}),
- 'season': ('season', {int}),
+ 'season_number': ('season', {int}),
'episode': ('episode', {str}),
'duration': ('duration', {float_or_none}),
'timestamp': ('updated_at', {parse_iso8601}),
diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py
index 52ee68eca..042b3220b 100644
--- a/yt_dlp/extractor/beeg.py
+++ b/yt_dlp/extractor/beeg.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
+ str_or_none,
traverse_obj,
try_get,
unified_timestamp,
@@ -22,7 +23,7 @@ class BeegIE(InfoExtractor):
'age_limit': 18,
'upload_date': '20220131',
'timestamp': 1643656455,
- 'display_id': 2540839,
+ 'display_id': '2540839',
}
}, {
'url': 'https://beeg.com/-0599050563103750?t=4-861',
@@ -36,7 +37,7 @@ class BeegIE(InfoExtractor):
'age_limit': 18,
'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
'timestamp': 1643623200,
- 'display_id': 2569965,
+ 'display_id': '2569965',
'upload_date': '20220131',
}
}, {
@@ -78,7 +79,7 @@ class BeegIE(InfoExtractor):
return {
'id': video_id,
- 'display_id': first_fact.get('id'),
+ 'display_id': str_or_none(first_fact.get('id')),
'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
'timestamp': unified_timestamp(first_fact.get('fc_created')),
diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py
index 5ae4b917a..677680b42 100644
--- a/yt_dlp/extractor/bellmedia.py
+++ b/yt_dlp/extractor/bellmedia.py
@@ -32,7 +32,7 @@ class BellMediaIE(InfoExtractor):
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525',
'timestamp': 1527288600,
- 'season_id': 73997,
+ 'season_id': '73997',
'season': '2018',
'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
'tags': [],
diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py
index 5d0c73ff3..c4621ca82 100644
--- a/yt_dlp/extractor/bfmtv.py
+++ b/yt_dlp/extractor/bfmtv.py
@@ -93,7 +93,6 @@ class BFMTVArticleIE(BFMTVBaseIE):
'id': '6318445464112',
'ext': 'mp4',
'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe',
- 'description': None,
'uploader_id': '876630703001',
'upload_date': '20230110',
'timestamp': 1673341692,
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index c138bde3a..f4e1c91a8 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -1996,7 +1996,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'title': get_element_by_class(
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
'description': get_element_by_class(
- 'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
+ 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
}, self._search_json_ld(webpage, video_id, default={}))
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py
index 41367c5b9..194bf1f46 100644
--- a/yt_dlp/extractor/bitchute.py
+++ b/yt_dlp/extractor/bitchute.py
@@ -185,7 +185,6 @@ class BitChuteChannelIE(InfoExtractor):
'info_dict': {
'id': 'UGlrF9o9b-Q',
'ext': 'mp4',
- 'filesize': None,
'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$',
diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py
index 5e5155af2..e875957cf 100644
--- a/yt_dlp/extractor/bleacherreport.py
+++ b/yt_dlp/extractor/bleacherreport.py
@@ -4,10 +4,12 @@ from ..utils import (
ExtractorError,
int_or_none,
parse_iso8601,
+ str_or_none,
)
class BleacherReportIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P\d+)'
_TESTS = [{
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
@@ -16,7 +18,7 @@ class BleacherReportIE(InfoExtractor):
'id': '2496438',
'ext': 'mp4',
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
- 'uploader_id': 3992341,
+ 'uploader_id': '3992341',
'description': 'CFB, ACC, Florida State',
'timestamp': 1434380212,
'upload_date': '20150615',
@@ -33,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
'timestamp': 1446839961,
'uploader': 'Sean Fay',
'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
- 'uploader_id': 6466954,
+ 'uploader_id': '6466954',
'upload_date': '20151011',
},
'add_ie': ['Youtube'],
@@ -58,7 +60,7 @@ class BleacherReportIE(InfoExtractor):
'id': article_id,
'title': article_data['title'],
'uploader': article_data.get('author', {}).get('name'),
- 'uploader_id': article_data.get('authorId'),
+ 'uploader_id': str_or_none(article_data.get('authorId')),
'timestamp': parse_iso8601(article_data.get('createdAt')),
'thumbnails': thumbnails,
'comment_count': int_or_none(article_data.get('commentsCount')),
@@ -82,6 +84,7 @@ class BleacherReportIE(InfoExtractor):
class BleacherReportCMSIE(AMPIE):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P[0-9a-f-]{36}|\d{5})'
_TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py
index d97fbd758..cf830210f 100644
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@@ -76,6 +76,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
class CBSIE(CBSBaseIE):
+ _WORKING = False
_VALID_URL = r'''(?x)
(?:
cbs:|
diff --git a/yt_dlp/extractor/cbsinteractive.py b/yt_dlp/extractor/cbsinteractive.py
deleted file mode 100644
index b09e9823e..000000000
--- a/yt_dlp/extractor/cbsinteractive.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from .cbs import CBSIE
-from ..utils import int_or_none
-
-
-class CBSInteractiveIE(CBSIE): # XXX: Do not subclass from concrete IE
- _VALID_URL = r'https?://(?:www\.)?(?Pcnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P[^/?]+)'
- _TESTS = [{
- 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
- 'info_dict': {
- 'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
- 'display_id': 'hands-on-with-microsofts-windows-8-1-update',
- 'ext': 'mp4',
- 'title': 'Hands-on with Microsoft Windows 8.1 Update',
- 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
- 'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
- 'uploader': 'Sarah Mitroff',
- 'duration': 70,
- 'timestamp': 1396479627,
- 'upload_date': '20140402',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
- 'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
- 'info_dict': {
- 'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
- 'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
- 'ext': 'mp4',
- 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
- 'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
- 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
- 'uploader': 'Ashley Esqueda',
- 'duration': 1482,
- 'timestamp': 1433289889,
- 'upload_date': '20150603',
- },
- }, {
- 'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
- 'info_dict': {
- 'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
- 'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
- 'ext': 'mp4',
- 'title': 'Video: Keeping Android smartphones and tablets secure',
- 'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
- 'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
- 'uploader': 'Adrian Kingsley-Hughes',
- 'duration': 731,
- 'timestamp': 1449129925,
- 'upload_date': '20151203',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
- 'only_matching': True,
- }]
-
- MPX_ACCOUNTS = {
- 'cnet': 2198311517,
- 'zdnet': 2387448114,
- }
-
- def _real_extract(self, url):
- site, display_id = self._match_valid_url(url).groups()
- webpage = self._download_webpage(url, display_id)
-
- data_json = self._html_search_regex(
- r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
- webpage, 'data json')
- data = self._parse_json(data_json, display_id)
- vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
-
- video_id = vdata['mpxRefId']
-
- title = vdata['title']
- author = vdata.get('author')
- if author:
- uploader = '%s %s' % (author['firstName'], author['lastName'])
- uploader_id = author.get('id')
- else:
- uploader = None
- uploader_id = None
-
- info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
- info.update({
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'duration': int_or_none(vdata.get('duration')),
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- })
- return info
diff --git a/yt_dlp/extractor/cbssports.py b/yt_dlp/extractor/cbssports.py
index b5d85af12..b9c82dab6 100644
--- a/yt_dlp/extractor/cbssports.py
+++ b/yt_dlp/extractor/cbssports.py
@@ -8,6 +8,7 @@ from ..utils import (
# class CBSSportsEmbedIE(CBSBaseIE):
class CBSSportsEmbedIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'cbssports:embed'
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
(?:
@@ -75,6 +76,7 @@ class CBSSportsBaseIE(InfoExtractor):
class CBSSportsIE(CBSSportsBaseIE):
+ _WORKING = False
IE_NAME = 'cbssports'
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P[^/?#&]+)'
_TESTS = [{
@@ -92,6 +94,7 @@ class CBSSportsIE(CBSSportsBaseIE):
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
+ _WORKING = False
IE_NAME = '247sports'
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P\d+)'
_TESTS = [{
diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py
index 466bdfb7c..8552ee511 100644
--- a/yt_dlp/extractor/cctv.py
+++ b/yt_dlp/extractor/cctv.py
@@ -88,6 +88,20 @@ class CCTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ # videoCenterId: "id"
+ 'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
+ 'info_dict': {
+ 'id': '5c846c0518444308ba32c4159df3b3e0',
+ 'ext': 'mp4',
+ 'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集:风物长宜放眼量',
+ 'uploader': 'yangjuan',
+ 'timestamp': 1708554940,
+ 'upload_date': '20240221',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
# var ids = ["id"]
'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
@@ -128,7 +142,7 @@ class CCTVIE(InfoExtractor):
video_id = self._search_regex(
[r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
- r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)',
+ r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
index 8390160a0..156b6a324 100644
--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -51,7 +51,7 @@ class CeskaTelevizeIE(InfoExtractor):
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
'only_matching': True,
'info_dict': {
- 'id': 402,
+ 'id': '402',
'ext': 'mp4',
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py
index aaafa02d1..5d9d9bcde 100644
--- a/yt_dlp/extractor/cgtn.py
+++ b/yt_dlp/extractor/cgtn.py
@@ -17,6 +17,7 @@ class CGTNIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1615295940,
'upload_date': '20210309',
+ 'categories': ['Video'],
},
'params': {
'skip_download': True
@@ -29,8 +30,8 @@ class CGTNIE(InfoExtractor):
'title': 'China, Indonesia vow to further deepen maritime cooperation',
'thumbnail': r're:^https?://.*\.png$',
'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.',
- 'author': 'CGTN',
- 'category': 'China',
+ 'creators': ['CGTN'],
+ 'categories': ['China'],
'timestamp': 1622950200,
'upload_date': '20210606',
},
@@ -45,7 +46,12 @@ class CGTNIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
download_url = self._html_search_regex(r'data-video ="(?P.+m3u8)"', webpage, 'download_url')
- datetime_str = self._html_search_regex(r'\s*(.+?)\s*', webpage, 'datetime_str', fatal=False)
+ datetime_str = self._html_search_regex(
+ r'\s*(.+?)\s*', webpage, 'datetime_str', fatal=False)
+ category = self._html_search_regex(
+ r'\s*(.+?)\s*', webpage, 'category', fatal=False)
+ author = self._search_regex(
+ r'\s*(.+?)\s*
', webpage, 'author', default=None)
return {
'id': video_id,
@@ -53,9 +59,7 @@ class CGTNIE(InfoExtractor):
'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage),
'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'),
- 'category': self._html_search_regex(r'\s*(.+?)\s*',
- webpage, 'category', fatal=False),
- 'author': self._html_search_regex(r'\s*(.+?)\s*
',
- webpage, 'author', default=None, fatal=False),
+ 'categories': [category] if category else None,
+ 'creators': [author] if author else None,
'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600),
}
diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py
deleted file mode 100644
index 48091dd65..000000000
--- a/yt_dlp/extractor/chingari.py
+++ /dev/null
@@ -1,207 +0,0 @@
-import itertools
-import json
-import urllib.parse
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- clean_html,
- int_or_none,
- str_to_int,
- url_or_none,
-)
-
-
-class ChingariBaseIE(InfoExtractor):
- def _get_post(self, id, post_data):
- media_data = post_data['mediaLocation']
- base_url = media_data['base']
- author_data = post_data.get('authorData', {})
- song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author'
-
- formats = [{
- 'format_id': frmt,
- 'width': str_to_int(frmt[1:]),
- 'url': base_url + frmt_path,
- } for frmt, frmt_path in media_data.get('transcoded', {}).items()]
-
- if media_data.get('path'):
- formats.append({
- 'format_id': 'original',
- 'format_note': 'Direct video.',
- 'url': base_url + '/apipublic' + media_data['path'],
- 'quality': 10,
- })
- timestamp = str_to_int(post_data.get('created_at'))
- if timestamp:
- timestamp = int_or_none(timestamp, 1000)
-
- thumbnail, uploader_url = None, None
- if media_data.get('thumbnail'):
- thumbnail = base_url + media_data.get('thumbnail')
- if author_data.get('username'):
- uploader_url = 'https://chingari.io/' + author_data.get('username')
-
- return {
- 'id': id,
- 'extractor_key': ChingariIE.ie_key(),
- 'extractor': 'Chingari',
- 'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
- 'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
- 'duration': media_data.get('duration'),
- 'thumbnail': url_or_none(thumbnail),
- 'like_count': post_data.get('likeCount'),
- 'view_count': post_data.get('viewsCount'),
- 'comment_count': post_data.get('commentCount'),
- 'repost_count': post_data.get('shareCount'),
- 'timestamp': timestamp,
- 'uploader_id': post_data.get('userId') or author_data.get('_id'),
- 'uploader': author_data.get('name'),
- 'uploader_url': url_or_none(uploader_url),
- 'track': song_data.get('title'),
- 'artist': song_data.get('author'),
- 'formats': formats,
- }
-
-
-class ChingariIE(ChingariBaseIE):
- _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P[^&/#?]+)'
- _TESTS = [{
- 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
- 'info_dict': {
- 'id': '612f8f4ce1dc57090e8a7beb',
- 'ext': 'mp4',
- 'title': 'Happy birthday Srila Prabhupada',
- 'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa',
- 'duration': 0,
- 'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg',
- 'like_count': int,
- 'view_count': int,
- 'comment_count': int,
- 'repost_count': int,
- 'timestamp': 1630506828,
- 'upload_date': '20210901',
- 'uploader_id': '5f0403982c8bd344f4813f8c',
- 'uploader': 'ISKCON,Inc.',
- 'uploader_url': 'https://chingari.io/iskcon,inc',
- 'track': None,
- 'artist': None,
- },
- 'params': {'skip_download': True}
- }]
-
- def _real_extract(self, url):
- id = self._match_id(url)
- post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id)
- if post_json['code'] != 200:
- raise ExtractorError(post_json['message'], expected=True)
- post_data = post_json['data']
- return self._get_post(id, post_data)
-
-
-class ChingariUserIE(ChingariBaseIE):
- _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P[^/?]+)'
- _TESTS = [{
- 'url': 'https://chingari.io/dada1023',
- 'info_dict': {
- 'id': 'dada1023',
- },
- 'params': {'playlistend': 3},
- 'playlist': [{
- 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
- 'info_dict': {
- 'id': '614781f3ade60b3a0bfff42a',
- 'ext': 'mp4',
- 'title': '#chingaribappa ',
- 'description': 'md5:d1df21d84088770468fa63afe3b17857',
- 'duration': 7,
- 'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg',
- 'like_count': int,
- 'view_count': int,
- 'comment_count': int,
- 'repost_count': int,
- 'timestamp': 1632076275,
- 'upload_date': '20210919',
- 'uploader_id': '5efc4b12cca35c3d1794c2d3',
- 'uploader': 'dada (girish) dhawale',
- 'uploader_url': 'https://chingari.io/dada1023',
- 'track': None,
- 'artist': None
- },
- 'params': {'skip_download': True}
- }, {
- 'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba',
- 'info_dict': {
- 'id': '6146b132bcbf860959e12cba',
- 'ext': 'mp4',
- 'title': 'Tactor harvesting',
- 'description': 'md5:8403f12dce68828b77ecee7eb7e887b7',
- 'duration': 59.3,
- 'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg',
- 'like_count': int,
- 'view_count': int,
- 'comment_count': int,
- 'repost_count': int,
- 'timestamp': 1632022834,
- 'upload_date': '20210919',
- 'uploader_id': '5efc4b12cca35c3d1794c2d3',
- 'uploader': 'dada (girish) dhawale',
- 'uploader_url': 'https://chingari.io/dada1023',
- 'track': None,
- 'artist': None
- },
- 'params': {'skip_download': True}
- }, {
- 'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82',
- 'info_dict': {
- 'id': '6145651b74cb030a64c40b82',
- 'ext': 'mp4',
- 'title': '#odiabhajan ',
- 'description': 'md5:687ea36835b9276cf2af90f25e7654cb',
- 'duration': 56.67,
- 'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg',
- 'like_count': int,
- 'view_count': int,
- 'comment_count': int,
- 'repost_count': int,
- 'timestamp': 1631937819,
- 'upload_date': '20210918',
- 'uploader_id': '5efc4b12cca35c3d1794c2d3',
- 'uploader': 'dada (girish) dhawale',
- 'uploader_url': 'https://chingari.io/dada1023',
- 'track': None,
- 'artist': None
- },
- 'params': {'skip_download': True}
- }],
- }, {
- 'url': 'https://chingari.io/iskcon%2Cinc',
- 'playlist_mincount': 1025,
- 'info_dict': {
- 'id': 'iskcon%2Cinc',
- },
- }]
-
- def _entries(self, id):
- skip = 0
- has_more = True
- for page in itertools.count():
- posts = self._download_json('https://api.chingari.io/users/getPosts', id,
- data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(),
- headers={'content-type': 'application/json;charset=UTF-8'},
- note='Downloading page %s' % page)
- for post in posts.get('data', []):
- post_data = post['post']
- yield self._get_post(post_data['_id'], post_data)
- skip += 20
- has_more = posts['hasMoreData']
- if not has_more:
- break
-
- def _real_extract(self, url):
- alt_id = self._match_id(url)
- post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id)
- if post_json['code'] != 200:
- raise ExtractorError(post_json['message'], expected=True)
- id = post_json['data']['_id']
- return self.playlist_result(self._entries(id), playlist_id=alt_id)
diff --git a/yt_dlp/extractor/chzzk.py b/yt_dlp/extractor/chzzk.py
index 6894baea5..420fe0514 100644
--- a/yt_dlp/extractor/chzzk.py
+++ b/yt_dlp/extractor/chzzk.py
@@ -2,7 +2,7 @@ import functools
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
+ UserNotLive,
float_or_none,
int_or_none,
parse_iso8601,
@@ -40,7 +40,7 @@ class CHZZKLiveIE(InfoExtractor):
note='Downloading channel info', errnote='Unable to download channel info')['content']
if live_detail.get('status') == 'CLOSE':
- raise ExtractorError('The channel is not currently live', expected=True)
+ raise UserNotLive(video_id=channel_id)
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)
diff --git a/yt_dlp/extractor/cinemax.py b/yt_dlp/extractor/cinemax.py
index 54cab2285..706ec8553 100644
--- a/yt_dlp/extractor/cinemax.py
+++ b/yt_dlp/extractor/cinemax.py
@@ -2,6 +2,7 @@ from .hbo import HBOBaseIE
class CinemaxIE(HBOBaseIE):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P[^/]+/video/[0-9a-z-]+-(?P\d+))'
_TESTS = [{
'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',
diff --git a/yt_dlp/extractor/cliphunter.py b/yt_dlp/extractor/cliphunter.py
deleted file mode 100644
index 2b907dc80..000000000
--- a/yt_dlp/extractor/cliphunter.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- url_or_none,
-)
-
-
-class CliphunterIE(InfoExtractor):
- IE_NAME = 'cliphunter'
-
- _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
- (?P[0-9]+)/
- (?P.+?)(?:$|[#\?])
- '''
- _TESTS = [{
- 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
- 'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
- 'info_dict': {
- 'id': '1012420',
- 'ext': 'flv',
- 'title': 'Fun Jynx Maze solo',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- },
- 'skip': 'Video gone',
- }, {
- 'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
- 'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
- 'info_dict': {
- 'id': '2019449',
- 'ext': 'mp4',
- 'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'age_limit': 18,
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_title = self._search_regex(
- r'mediaTitle = "([^"]+)"', webpage, 'title')
-
- gexo_files = self._parse_json(
- self._search_regex(
- r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
- video_id)
-
- formats = []
- for format_id, f in gexo_files.items():
- video_url = url_or_none(f.get('url'))
- if not video_url:
- continue
- fmt = f.get('fmt')
- height = f.get('h')
- format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'width': int_or_none(f.get('w')),
- 'height': int_or_none(height),
- 'tbr': int_or_none(f.get('br')),
- })
-
- thumbnail = self._search_regex(
- r"var\s+mov_thumb\s*=\s*'([^']+)';",
- webpage, 'thumbnail', fatal=False)
-
- return {
- 'id': video_id,
- 'title': video_title,
- 'formats': formats,
- 'age_limit': self._rta_search(webpage),
- 'thumbnail': thumbnail,
- }
diff --git a/yt_dlp/extractor/cliprs.py b/yt_dlp/extractor/cliprs.py
index 567f77b94..c2add02da 100644
--- a/yt_dlp/extractor/cliprs.py
+++ b/yt_dlp/extractor/cliprs.py
@@ -2,6 +2,7 @@ from .onet import OnetBaseIE
class ClipRsIE(OnetBaseIE):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P[^/]+)/\d+'
_TEST = {
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py
index e78e26a11..1f9a5f611 100644
--- a/yt_dlp/extractor/closertotruth.py
+++ b/yt_dlp/extractor/closertotruth.py
@@ -4,6 +4,7 @@ from .common import InfoExtractor
class CloserToTruthIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P[^/?#&]+)'
_TESTS = [{
'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py
index c4c7d66a5..a812c24af 100644
--- a/yt_dlp/extractor/cloudflarestream.py
+++ b/yt_dlp/extractor/cloudflarestream.py
@@ -4,27 +4,25 @@ from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor):
+ _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
- _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
+ _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:watch\.)?%s/|
- %s
- )
- (?P%s)
- ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
- _EMBED_REGEX = [fr'',
- webpage, 'json data'),
- episode_id)['episodes'][show_id][episode_id]
-
- title = episode['title']
-
- show_title = episode.get('showTitle')
- if show_title:
- title = '%s - %s' % (show_title, title)
-
- formats = [{
- 'url': update_url_query(episode['audioURL'], query={'cbr': abr}),
- 'format_id': compat_str(abr),
- 'abr': abr,
- 'vcodec': 'none',
- } for abr in (96, 128, 192, 256)]
- self._check_formats(formats, episode_id)
-
- description = clean_html(episode.get('longTeaser'))
- thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
- duration = int_or_none(episode.get('duration'))
- timestamp = unified_timestamp(episode.get('publishedAt'))
-
- return {
- 'id': episode_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- }
diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py
index 79d9c8e31..2f50efeda 100644
--- a/yt_dlp/extractor/rcti.py
+++ b/yt_dlp/extractor/rcti.py
@@ -229,7 +229,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
'age_limit': 2,
'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'],
'display_id': 'putri-untuk-pangeran',
- 'tag': 'count:18',
+ 'tags': 'count:18',
},
}, { # No episodes
'url': 'https://www.rctiplus.com/programs/615/inews-pagi',
@@ -239,7 +239,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
'title': 'iNews Pagi',
'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04',
'age_limit': 2,
- 'tag': 'count:11',
+ 'tags': 'count:11',
'display_id': 'inews-pagi',
}
}]
@@ -327,8 +327,8 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]),
'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'),
expected_type=lambda x: strip_or_none(x) or None),
- 'tag': traverse_obj(series_meta, ('tag', ..., 'name'),
- expected_type=lambda x: strip_or_none(x) or None),
+ 'tags': traverse_obj(series_meta, ('tag', ..., 'name'),
+ expected_type=lambda x: strip_or_none(x) or None),
}
return self.playlist_result(
self._series_entries(series_id, display_id, video_type, metadata), series_id,
diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py
index 9a2e0d985..1a1c6634e 100644
--- a/yt_dlp/extractor/rds.py
+++ b/yt_dlp/extractor/rds.py
@@ -8,6 +8,7 @@ from ..compat import compat_str
class RDSIE(InfoExtractor):
+ _WORKING = False
IE_DESC = 'RDS.ca'
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P[^/]+)-\d+\.\d+'
diff --git a/yt_dlp/extractor/redbee.py b/yt_dlp/extractor/redbee.py
index b59b518b1..4d71133b3 100644
--- a/yt_dlp/extractor/redbee.py
+++ b/yt_dlp/extractor/redbee.py
@@ -134,6 +134,7 @@ class ParliamentLiveUKIE(RedBeeBaseIE):
class RTBFIE(RedBeeBaseIE):
+ _WORKING = False
_VALID_URL = r'''(?x)
https?://(?:www\.)?rtbf\.be/
(?:
diff --git a/yt_dlp/extractor/regiotv.py b/yt_dlp/extractor/regiotv.py
deleted file mode 100644
index edb6ae5bc..000000000
--- a/yt_dlp/extractor/regiotv.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from .common import InfoExtractor
-from ..networking import Request
-from ..utils import xpath_text, xpath_with_ns
-
-
-class RegioTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.regio-tv.de/video/395808.html',
- 'info_dict': {
- 'id': '395808',
- 'ext': 'mp4',
- 'title': 'Wir in Ludwigsburg',
- 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!',
- }
- }, {
- 'url': 'http://www.regio-tv.de/video/395808',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- key = self._search_regex(
- r'key\s*:\s*(["\'])(?P.+?)\1', webpage, 'key', group='key')
- title = self._og_search_title(webpage)
-
- SOAP_TEMPLATE = '<{0} xmlns="http://v.telvi.de/">{1}{0}>'
-
- request = Request(
- 'http://v.telvi.de/',
- SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8'))
- video_data = self._download_xml(request, video_id, 'Downloading video XML')
-
- NS_MAP = {
- 'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
- 'soap': 'http://schemas.xmlsoap.org/soap/envelope/',
- }
-
- video_url = xpath_text(
- video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True)
- thumbnail = xpath_text(
- video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail')
- description = self._og_search_description(
- webpage) or self._html_search_meta('description', webpage)
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- }
diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py
index fdde31704..abb537cf3 100644
--- a/yt_dlp/extractor/rentv.py
+++ b/yt_dlp/extractor/rentv.py
@@ -8,6 +8,7 @@ from ..utils import (
class RENTVIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P\d+)'
_TESTS = [{
'url': 'http://ren.tv/video/epizod/118577',
@@ -59,6 +60,7 @@ class RENTVIE(InfoExtractor):
class RENTVArticleIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P[^/?#]+)'
_TESTS = [{
'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v',
diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py
index 6d032564d..f49262a65 100644
--- a/yt_dlp/extractor/restudy.py
+++ b/yt_dlp/extractor/restudy.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
class RestudyIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P[0-9]+)'
_TESTS = [{
'url': 'https://www.restudy.dk/video/play/id/1637',
diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py
index 6919425f3..0a8f13b9f 100644
--- a/yt_dlp/extractor/reuters.py
+++ b/yt_dlp/extractor/reuters.py
@@ -9,6 +9,7 @@ from ..utils import (
class ReutersIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P[0-9]+)'
_TEST = {
'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
diff --git a/yt_dlp/extractor/ridehome.py b/yt_dlp/extractor/ridehome.py
new file mode 100644
index 000000000..78f838ac1
--- /dev/null
+++ b/yt_dlp/extractor/ridehome.py
@@ -0,0 +1,96 @@
+from .art19 import Art19IE
+from .common import InfoExtractor
+from ..utils import extract_attributes, get_elements_html_by_class
+from ..utils.traversal import traverse_obj
+
+
+class RideHomeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ridehome\.info/show/[\w-]+/(?P[\w-]+)/?(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.ridehome.info/show/techmeme-ride-home/thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs/',
+ 'info_dict': {
+ 'id': 'thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'c84ea3cc96950a9ab86fe540f3edc588',
+ 'info_dict': {
+ 'id': '540e5493-9fe6-4c14-a488-dc508d8794b2',
+ 'ext': 'mp3',
+ 'title': 'Thu. 12/28 – Will 2024 Be The Year Apple Gets Serious About Gaming On Macs?',
+ 'description': 'md5:9dba86ae9b5047a8150eceddeeb629c2',
+ 'series': 'Techmeme Ride Home',
+ 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b',
+ 'upload_date': '20231228',
+ 'timestamp': 1703780995,
+ 'modified_date': '20231230',
+ 'episode_id': '540e5493-9fe6-4c14-a488-dc508d8794b2',
+ 'modified_timestamp': 1703912404,
+ 'release_date': '20231228',
+ 'release_timestamp': 1703782800,
+ 'duration': 1000.1502,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$',
+ },
+ }],
+ }, {
+ 'url': 'https://www.ridehome.info/show/techmeme-ride-home/portfolio-profile-sensel-with-ilyarosenberg/',
+ 'info_dict': {
+ 'id': 'portfolio-profile-sensel-with-ilyarosenberg',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'bf9d6efad221008ce71aea09d5533cf6',
+ 'info_dict': {
+ 'id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac',
+ 'ext': 'mp3',
+ 'title': '(Portfolio Profile) Sensel - With @IlyaRosenberg',
+ 'description': 'md5:e1e4a970bce04290e0ba6f030b0125db',
+ 'series': 'Techmeme Ride Home',
+ 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b',
+ 'upload_date': '20220108',
+ 'timestamp': 1641656064,
+ 'modified_date': '20230418',
+ 'episode_id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac',
+ 'modified_timestamp': 1681843318,
+ 'release_date': '20220108',
+ 'release_timestamp': 1641672000,
+ 'duration': 2789.38122,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$'
+ },
+ }],
+ }, {
+ 'url': 'https://www.ridehome.info/show/spacecasts/big-tech-news-apples-macbook-pro-event/',
+ 'info_dict': {
+ 'id': 'big-tech-news-apples-macbook-pro-event',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': 'b1428530c6e03904a8271e978007fc05',
+ 'info_dict': {
+ 'id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7',
+ 'ext': 'mp3',
+ 'title': 'md5:e6c05d44d59b6577a4145ac339de5040',
+ 'description': 'md5:14152f7228c8a301a77e3d6bc891b145',
+ 'series': 'SpaceCasts',
+ 'series_id': '8e3e837d-7fe0-4a23-8e11-894917e07e17',
+ 'upload_date': '20211026',
+ 'timestamp': 1635271450,
+ 'modified_date': '20230502',
+ 'episode_id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7',
+ 'modified_timestamp': 1683057500,
+ 'release_date': '20211026',
+ 'release_timestamp': 1635272124,
+ 'duration': 2266.30531,
+ 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$'
+ },
+ }],
+ }]
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+
+ urls = traverse_obj(
+ get_elements_html_by_class('iframeContainer', webpage),
+ (..., {extract_attributes}, lambda k, v: k == 'data-src' and Art19IE.suitable(v)))
+ return self.playlist_from_matches(urls, article_id, ie=Art19IE)
diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py
index c491aaf53..b0b92e642 100644
--- a/yt_dlp/extractor/rockstargames.py
+++ b/yt_dlp/extractor/rockstargames.py
@@ -6,6 +6,7 @@ from ..utils import (
class RockstarGamesIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P\d+)'
_TESTS = [{
'url': 'https://www.rockstargames.com/videos/video/11544/',
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index cad76f0c9..5099f3ae4 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -38,7 +38,7 @@ class RokfinIE(InfoExtractor):
'upload_date': '20211023',
'timestamp': 1634998029,
'channel': 'Jimmy Dore',
- 'channel_id': 65429,
+ 'channel_id': '65429',
'channel_url': 'https://rokfin.com/TheJimmyDoreShow',
'availability': 'public',
'live_status': 'not_live',
@@ -56,7 +56,7 @@ class RokfinIE(InfoExtractor):
'upload_date': '20190412',
'timestamp': 1555052644,
'channel': 'Ron Placone',
- 'channel_id': 10,
+ 'channel_id': '10',
'channel_url': 'https://rokfin.com/RonPlacone',
'availability': 'public',
'live_status': 'not_live',
@@ -73,7 +73,7 @@ class RokfinIE(InfoExtractor):
'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
'description': 'md5:324ce2d3e3b62e659506409e458b9d8e',
'channel': 'TLAVagabond',
- 'channel_id': 53856,
+ 'channel_id': '53856',
'channel_url': 'https://rokfin.com/TLAVagabond',
'availability': 'public',
'is_live': False,
@@ -86,7 +86,6 @@ class RokfinIE(InfoExtractor):
'dislike_count': int,
'like_count': int,
'tags': ['FreeThinkingMedia^'],
- 'duration': None,
}
}, {
'url': 'https://rokfin.com/post/126703/Brave-New-World--Aldous-Huxley-DEEPDIVE--Chpts-13--Quite-Frankly--Jay-Dyer',
@@ -96,7 +95,7 @@ class RokfinIE(InfoExtractor):
'title': 'Brave New World - Aldous Huxley DEEPDIVE! (Chpts 1-3) - Quite Frankly & Jay Dyer',
'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
'channel': 'Jay Dyer',
- 'channel_id': 186881,
+ 'channel_id': '186881',
'channel_url': 'https://rokfin.com/jaydyer',
'availability': 'premium_only',
'live_status': 'not_live',
@@ -116,7 +115,7 @@ class RokfinIE(InfoExtractor):
'title': 'The Grayzone live on Nordstream blame game',
'thumbnail': r're:https://image\.v\.rokfin\.com/.+',
'channel': 'Max Blumenthal',
- 'channel_id': 248902,
+ 'channel_id': '248902',
'channel_url': 'https://rokfin.com/MaxBlumenthal',
'availability': 'premium_only',
'live_status': 'was_live',
@@ -174,7 +173,7 @@ class RokfinIE(InfoExtractor):
'like_count': int_or_none(metadata.get('likeCount')),
'dislike_count': int_or_none(metadata.get('dislikeCount')),
'channel': str_or_none(traverse_obj(metadata, ('createdBy', 'name'), ('creator', 'name'))),
- 'channel_id': traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id')),
+ 'channel_id': str_or_none(traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id'))),
'channel_url': url_or_none(f'https://rokfin.com/{uploader}') if uploader else None,
'timestamp': timestamp,
'release_timestamp': timestamp if live_status != 'not_live' else None,
diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py
index 94e673b13..e19a85d06 100644
--- a/yt_dlp/extractor/roosterteeth.py
+++ b/yt_dlp/extractor/roosterteeth.py
@@ -2,16 +2,17 @@ from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
+ LazyList,
int_or_none,
join_nonempty,
- LazyList,
+ parse_iso8601,
parse_qs,
str_or_none,
traverse_obj,
+ update_url_query,
url_or_none,
urlencode_postdata,
urljoin,
- update_url_query,
)
@@ -70,6 +71,7 @@ class RoosterTeethBaseIE(InfoExtractor):
'episode_id': str_or_none(data.get('uuid')),
'channel_id': attributes.get('channel_id'),
'duration': int_or_none(attributes.get('length')),
+ 'release_timestamp': parse_iso8601(attributes.get('original_air_date')),
'thumbnails': thumbnails,
'availability': self._availability(
needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only,
@@ -91,6 +93,17 @@ class RoosterTeethIE(RoosterTeethBaseIE):
'thumbnail': r're:^https?://.*\.png$',
'series': 'Million Dollars, But...',
'episode': 'Million Dollars, But... The Game Announcement',
+ 'tags': ['Game Show', 'Sketch'],
+ 'season_number': 2,
+ 'availability': 'public',
+ 'episode_number': 10,
+ 'episode_id': '00374575-464e-11e7-a302-065410f210c4',
+ 'season': 'Season 2',
+ 'season_id': 'ffa27d48-464d-11e7-a302-065410f210c4',
+ 'channel_id': '92b6bb21-91d2-4b1b-bf95-3268fa0d9939',
+ 'duration': 145,
+ 'release_timestamp': 1462982400,
+ 'release_date': '20160511',
},
'params': {'skip_download': True},
}, {
@@ -104,6 +117,42 @@ class RoosterTeethIE(RoosterTeethBaseIE):
'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
'ext': 'mp4',
+ 'availability': 'public',
+ 'episode_id': 'f8117b13-f068-499e-803e-eec9ea2dec8c',
+ 'episode_number': 3,
+ 'tags': ['Animation'],
+ 'season_id': '4b8f0a9e-12c4-41ed-8caa-fed15a85bab8',
+ 'season': 'Season 1',
+ 'series': 'RWBY: World of Remnant',
+ 'season_number': 1,
+ 'duration': 216,
+ 'release_timestamp': 1413489600,
+ 'release_date': '20141016',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ # only works with video_data['attributes']['url'] m3u8 url
+ 'url': 'https://www.roosterteeth.com/watch/achievement-hunter-achievement-hunter-fatality-walkthrough-deathstroke-lex-luthor-captain-marvel-green-lantern-and-wonder-woman',
+ 'info_dict': {
+ 'id': '25394',
+ 'ext': 'mp4',
+ 'title': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman',
+ 'description': 'md5:91bb934698344fb9647b1c7351f16964',
+ 'availability': 'public',
+ 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
+ 'episode': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman',
+ 'episode_number': 71,
+ 'episode_id': 'ffaec998-464d-11e7-a302-065410f210c4',
+ 'season': 'Season 2008',
+ 'tags': ['Gaming'],
+ 'series': 'Achievement Hunter',
+ 'display_id': 'md5:4465ce4f001735f9d7a2ae529a543d31',
+ 'season_id': 'ffa13340-464d-11e7-a302-065410f210c4',
+ 'season_number': 2008,
+ 'channel_id': '2cb2a70c-be50-46f5-93d7-84a1baabb4f7',
+ 'duration': 189,
+ 'release_timestamp': 1228317300,
+ 'release_date': '20081203',
},
'params': {'skip_download': True},
}, {
@@ -133,10 +182,10 @@ class RoosterTeethIE(RoosterTeethBaseIE):
try:
video_data = self._download_json(
- api_episode_url + '/videos', display_id,
- 'Downloading video JSON metadata')['data'][0]
+ api_episode_url + '/videos', display_id, 'Downloading video JSON metadata',
+ headers={'Client-Type': 'web'})['data'][0] # web client-type yields ad-free streams
m3u8_url = video_data['attributes']['url']
- # XXX: additional URL at video_data['links']['download']
+ # XXX: additional ad-free URL at video_data['links']['download'] but often gives 403 errors
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
if self._parse_json(e.cause.response.read().decode(), display_id).get('access') is False:
diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py
index 63134322d..411a62519 100644
--- a/yt_dlp/extractor/rozhlas.py
+++ b/yt_dlp/extractor/rozhlas.py
@@ -247,17 +247,17 @@ class MujRozhlasIE(RozhlasBaseIE):
'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli',
'md5': '6f8fd68663e64936623e67c152a669e0',
'info_dict': {
- 'id': '10739193',
+ 'id': '10787730',
'ext': 'mp3',
'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli',
'description': 'md5:db7141e9caaedc9041ec7cefb9a62908',
'timestamp': 1684915200,
- 'modified_timestamp': 1684922446,
+ 'modified_timestamp': 1687550432,
'series': 'Vykopávky',
'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg',
'channel_id': 'radio-wave',
'upload_date': '20230524',
- 'modified_date': '20230524',
+ 'modified_date': '20230623',
},
}, {
# serial extraction
@@ -277,6 +277,26 @@ class MujRozhlasIE(RozhlasBaseIE):
'title': 'Nespavci',
'description': 'md5:c430adcbf9e2b9eac88b745881e814dc',
},
+ }, {
+ # serialPart
+ 'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu',
+ 'info_dict': {
+ 'id': '8889035',
+ 'ext': 'm4a',
+ 'title': 'Gustavo Adolfo Bécquer: Hora duchů',
+ 'description': 'md5:343a15257b376c276e210b78e900ffea',
+ 'chapter': 'Hora duchů a Polibek – dva tajemné příběhy Gustava Adolfa Bécquera',
+ 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg',
+ 'timestamp': 1708173000,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'series': 'Povídka',
+ 'modified_date': '20240217',
+ 'upload_date': '20240217',
+ 'modified_timestamp': 1708173198,
+ 'channel_id': 'vltava',
+ },
+ 'params': {'skip_download': 'dash'},
}]
def _call_api(self, path, item_id, msg='API JSON'):
@@ -322,7 +342,7 @@ class MujRozhlasIE(RozhlasBaseIE):
entity = info['siteEntityBundle']
- if entity == 'episode':
+ if entity in ('episode', 'serialPart'):
return self._extract_audio_entry(self._call_api(
'episodes', info['contentId'], 'episode info API JSON'))
diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py
index 9f73d1811..bce5cba82 100644
--- a/yt_dlp/extractor/rts.py
+++ b/yt_dlp/extractor/rts.py
@@ -13,6 +13,7 @@ from ..utils import (
class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
IE_DESC = 'RTS.ch'
_VALID_URL = r'rts:(?P\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P[0-9]+)-(?P.+?)\.html'
diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py
index 85ad7e2ff..11095b262 100644
--- a/yt_dlp/extractor/rule34video.py
+++ b/yt_dlp/extractor/rule34video.py
@@ -9,7 +9,6 @@ from ..utils import (
get_element_html_by_class,
get_elements_by_class,
int_or_none,
- join_nonempty,
parse_count,
parse_duration,
unescapeHTML,
@@ -57,7 +56,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int,
'timestamp': 1640131200,
'description': '',
- 'creator': 'WildeerStudio',
+ 'creators': ['WildeerStudio'],
'upload_date': '20211222',
'uploader': 'CerZule',
'uploader_url': 'https://rule34video.com/members/36281/',
@@ -81,13 +80,13 @@ class Rule34VideoIE(InfoExtractor):
'quality': quality,
})
- categories, creator, uploader, uploader_url = [None] * 4
+ categories, creators, uploader, uploader_url = [None] * 4
for col in get_elements_by_class('col', webpage):
label = clean_html(get_element_by_class('label', col))
if label == 'Categories:':
categories = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Artist:':
- creator = join_nonempty(*map(clean_html, get_elements_by_class('item', col)), delim=', ')
+ creators = list(map(clean_html, get_elements_by_class('item', col)))
elif label == 'Uploaded By:':
uploader = clean_html(get_element_by_class('name', col))
uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href')
@@ -115,7 +114,7 @@ class Rule34VideoIE(InfoExtractor):
'comment_count': int_or_none(self._search_regex(
r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)),
'age_limit': 18,
- 'creator': creator,
+ 'creators': creators,
'uploader': uploader,
'uploader_url': uploader_url,
'categories': categories,
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 1dc049ac8..837a324e6 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -90,7 +90,6 @@ class RumbleEmbedIE(InfoExtractor):
'channel_url': 'https://rumble.com/c/LofiGirl',
'channel': 'Lofi Girl',
'thumbnail': r're:https://.+\.jpg',
- 'duration': None,
'uploader': 'Lofi Girl',
'live_status': 'is_live',
},
diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py
index 08d9b9257..287824d08 100644
--- a/yt_dlp/extractor/rutube.py
+++ b/yt_dlp/extractor/rutube.py
@@ -46,7 +46,7 @@ class RutubeBaseIE(InfoExtractor):
'uploader': try_get(video, lambda x: x['author']['name']),
'uploader_id': compat_str(uploader_id) if uploader_id else None,
'timestamp': unified_timestamp(video.get('created_ts')),
- 'category': [category] if category else None,
+ 'categories': [category] if category else None,
'age_limit': age_limit,
'view_count': int_or_none(video.get('hits')),
'comment_count': int_or_none(video.get('comments_count')),
@@ -112,7 +112,7 @@ class RutubeIE(RutubeBaseIE):
'age_limit': 0,
'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
- 'category': ['Новости и СМИ'],
+ 'categories': ['Новости и СМИ'],
'chapters': [],
},
'expected_warnings': ['Unable to download f4m'],
@@ -144,7 +144,7 @@ class RutubeIE(RutubeBaseIE):
'age_limit': 0,
'view_count': int,
'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
- 'category': ['Видеоигры'],
+ 'categories': ['Видеоигры'],
'chapters': [],
},
'expected_warnings': ['Unable to download f4m'],
@@ -154,7 +154,7 @@ class RutubeIE(RutubeBaseIE):
'id': 'c65b465ad0c98c89f3b25cb03dcc87c6',
'ext': 'mp4',
'chapters': 'count:4',
- 'category': ['Бизнес и предпринимательство'],
+ 'categories': ['Бизнес и предпринимательство'],
'description': 'md5:252feac1305257d8c1bab215cedde75d',
'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
'duration': 782,
diff --git a/yt_dlp/extractor/saitosan.py b/yt_dlp/extractor/saitosan.py
index d2f60e92f..a5f05e1d0 100644
--- a/yt_dlp/extractor/saitosan.py
+++ b/yt_dlp/extractor/saitosan.py
@@ -3,6 +3,7 @@ from ..utils import ExtractorError, try_get
class SaitosanIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'Saitosan'
_VALID_URL = r'https?://(?:www\.)?saitosan\.net/bview.html\?id=(?P[0-9]+)'
_TESTS = [{
diff --git a/yt_dlp/extractor/savefrom.py b/yt_dlp/extractor/savefrom.py
deleted file mode 100644
index 9c9e74b6d..000000000
--- a/yt_dlp/extractor/savefrom.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import os.path
-
-from .common import InfoExtractor
-
-
-class SaveFromIE(InfoExtractor):
- IE_NAME = 'savefrom.net'
- _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P.*)$'
-
- _TEST = {
- 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com',
- 'info_dict': {
- 'id': 'UlVRAPW2WJY',
- 'ext': 'mp4',
- 'title': 'About Team Radical MMA | MMA Fighting',
- 'upload_date': '20120816',
- 'uploader': 'Howcast',
- 'uploader_id': 'Howcast',
- 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*',
- },
- 'params': {
- 'skip_download': True
- }
- }
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = os.path.splitext(url.split('/')[-1])[0]
-
- return self.url_result(mobj.group('url'), video_id=video_id)
diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py
index 7a9115047..8d61e22fc 100644
--- a/yt_dlp/extractor/sbs.py
+++ b/yt_dlp/extractor/sbs.py
@@ -44,8 +44,6 @@ class SBSIE(InfoExtractor):
'timestamp': 1408613220,
'upload_date': '20140821',
'uploader': 'SBSC',
- 'tags': None,
- 'categories': None,
},
'expected_warnings': ['Unable to download JSON metadata'],
}, {
diff --git a/yt_dlp/extractor/seeker.py b/yt_dlp/extractor/seeker.py
deleted file mode 100644
index 65eb16a09..000000000
--- a/yt_dlp/extractor/seeker.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- get_element_by_class,
- strip_or_none,
-)
-
-
-class SeekerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P.*)-(?P\d+)\.html'
- _TESTS = [{
- 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
- 'md5': '897d44bbe0d8986a2ead96de565a92db',
- 'info_dict': {
- 'id': 'Elrn3gnY',
- 'ext': 'mp4',
- 'title': 'Should Trump Be Required To Release His Tax Returns?',
- 'description': 'md5:41efa8cfa8d627841045eec7b018eb45',
- 'timestamp': 1490090165,
- 'upload_date': '20170321',
- }
- }, {
- 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
- 'playlist': [
- {
- 'md5': '0497b9f20495174be73ae136949707d2',
- 'info_dict': {
- 'id': 'FihYQ8AE',
- 'ext': 'mp4',
- 'title': 'The Pros & Cons Of Zoos',
- 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c',
- 'timestamp': 1490039133,
- 'upload_date': '20170320',
- },
- }
- ],
- 'info_dict': {
- 'id': '1834116536',
- 'title': 'After Gorilla Killing, Changes Ahead for Zoos',
- 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
- },
- }]
-
- def _real_extract(self, url):
- display_id, article_id = self._match_valid_url(url).groups()
- webpage = self._download_webpage(url, display_id)
- entries = []
- for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage):
- entries.append(self.url_result(
- 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id))
- return self.playlist_result(
- entries, article_id,
- self._og_search_title(webpage),
- strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage))
diff --git a/yt_dlp/extractor/senalcolombia.py b/yt_dlp/extractor/senalcolombia.py
index f3c066da7..b2f354fae 100644
--- a/yt_dlp/extractor/senalcolombia.py
+++ b/yt_dlp/extractor/senalcolombia.py
@@ -3,6 +3,7 @@ from .rtvcplay import RTVCKalturaIE
class SenalColombiaLiveIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?Psenal-en-vivo)'
_TESTS = [{
diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py
index 3600e2e74..1ecea71fc 100644
--- a/yt_dlp/extractor/sendtonews.py
+++ b/yt_dlp/extractor/sendtonews.py
@@ -12,6 +12,7 @@ from ..utils import (
class SendtoNewsIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P[0-9A-Za-z-]+)'
_TEST = {
diff --git a/yt_dlp/extractor/sexu.py b/yt_dlp/extractor/sexu.py
index 3117f81e3..989b63c72 100644
--- a/yt_dlp/extractor/sexu.py
+++ b/yt_dlp/extractor/sexu.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
class SexuIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P\d+)'
_TEST = {
'url': 'http://sexu.com/961791/',
diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py
index e02f8cef0..54dfdc441 100644
--- a/yt_dlp/extractor/skeb.py
+++ b/yt_dlp/extractor/skeb.py
@@ -10,7 +10,7 @@ class SkebIE(InfoExtractor):
'info_dict': {
'id': '466853',
'title': '内容はおまかせします! by 姫ノ森りぃる@一周年',
- 'descripion': 'md5:1ec50901efc3437cfbfe3790468d532d',
+ 'description': 'md5:1ec50901efc3437cfbfe3790468d532d',
'uploader': '姫ノ森りぃる@一周年',
'uploader_id': 'riiru_wm',
'age_limit': 0,
@@ -34,7 +34,7 @@ class SkebIE(InfoExtractor):
'info_dict': {
'id': '489408',
'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
- 'descripion': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
+ 'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
'uploader': '古川ノブ@音楽とVlogのVtuber',
'uploader_id': 'furukawa_nob',
'age_limit': 0,
@@ -61,12 +61,12 @@ class SkebIE(InfoExtractor):
'info_dict': {
'id': '6',
'title': 'ヒロ。\n\n私のキャラク... by 諸々',
- 'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
+ 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
'_type': 'playlist',
'entries': [{
'id': '486430',
'title': 'ヒロ。\n\n私のキャラク... by 諸々',
- 'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
+ 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
}, {
'id': '486431',
'title': 'ヒロ。\n\n私のキャラク... by 諸々',
@@ -81,7 +81,7 @@ class SkebIE(InfoExtractor):
parent = {
'id': video_id,
'title': nuxt_data.get('title'),
- 'descripion': nuxt_data.get('description'),
+ 'description': nuxt_data.get('description'),
'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
'age_limit': 18 if nuxt_data.get('nsfw') else 0,
diff --git a/yt_dlp/extractor/skylinewebcams.py b/yt_dlp/extractor/skylinewebcams.py
index 4292bb2ae..197407c18 100644
--- a/yt_dlp/extractor/skylinewebcams.py
+++ b/yt_dlp/extractor/skylinewebcams.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
class SkylineWebcamsIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P[^/]+)\.html'
_TEST = {
'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html',
diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py
index 6264b04bb..867782778 100644
--- a/yt_dlp/extractor/skynewsarabia.py
+++ b/yt_dlp/extractor/skynewsarabia.py
@@ -38,6 +38,7 @@ class SkyNewsArabiaBaseIE(InfoExtractor):
class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
+ _WORKING = False
IE_NAME = 'skynewsarabia:video'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P[0-9]+)'
_TEST = {
@@ -64,6 +65,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE):
class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE):
+ _WORKING = False
IE_NAME = 'skynewsarabia:article'
_VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P[0-9]+)'
_TESTS = [{
diff --git a/yt_dlp/extractor/stageplus.py b/yt_dlp/extractor/stageplus.py
index 4bed4d646..77e4362fc 100644
--- a/yt_dlp/extractor/stageplus.py
+++ b/yt_dlp/extractor/stageplus.py
@@ -21,7 +21,7 @@ class StagePlusVODConcertIE(InfoExtractor):
'id': 'vod_concert_APNM8GRFDPHMASJKBSPJACG',
'title': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
'description': 'md5:50f78ec180518c9bdb876bac550996fc',
- 'artist': ['Yuja Wang', 'Lorenzo Viotti'],
+ 'artists': ['Yuja Wang', 'Lorenzo Viotti'],
'upload_date': '20230331',
'timestamp': 1680249600,
'release_date': '20210709',
@@ -40,10 +40,10 @@ class StagePlusVODConcertIE(InfoExtractor):
'release_timestamp': 1625788800,
'duration': 2207,
'chapters': 'count:5',
- 'artist': ['Yuja Wang'],
- 'composer': ['Sergei Rachmaninoff'],
+ 'artists': ['Yuja Wang'],
+ 'composers': ['Sergei Rachmaninoff'],
'album': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
- 'album_artist': ['Yuja Wang', 'Lorenzo Viotti'],
+ 'album_artists': ['Yuja Wang', 'Lorenzo Viotti'],
'track': 'Piano Concerto No. 2 in C Minor, Op. 18',
'track_number': 1,
'genre': 'Instrumental Concerto',
@@ -474,7 +474,7 @@ fragment BannerFields on Banner {
metadata = traverse_obj(data, {
'title': 'title',
'description': ('shortDescription', {str}),
- 'artist': ('artists', 'edges', ..., 'node', 'name'),
+ 'artists': ('artists', 'edges', ..., 'node', 'name'),
'timestamp': ('archiveReleaseDate', {unified_timestamp}),
'release_timestamp': ('productionDate', {unified_timestamp}),
})
@@ -494,7 +494,7 @@ fragment BannerFields on Banner {
'formats': formats,
'subtitles': subtitles,
'album': metadata.get('title'),
- 'album_artist': metadata.get('artist'),
+ 'album_artists': metadata.get('artist'),
'track_number': idx,
**metadata,
**traverse_obj(video, {
@@ -506,8 +506,8 @@ fragment BannerFields on Banner {
'title': 'title',
'start_time': ('mark', {float_or_none}),
}),
- 'artist': ('artists', 'edges', ..., 'node', 'name'),
- 'composer': ('work', 'composers', ..., 'name'),
+ 'artists': ('artists', 'edges', ..., 'node', 'name'),
+ 'composers': ('work', 'composers', ..., 'name'),
'genre': ('work', 'genre', 'title'),
}),
})
diff --git a/yt_dlp/extractor/startrek.py b/yt_dlp/extractor/startrek.py
index e92122f9b..94efb589c 100644
--- a/yt_dlp/extractor/startrek.py
+++ b/yt_dlp/extractor/startrek.py
@@ -3,6 +3,7 @@ from ..utils import int_or_none, urljoin
class StarTrekIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'(?Phttps?://(?:intl|www)\.startrek\.com)/videos/(?P[^/]+)'
_TESTS = [{
'url': 'https://intl.startrek.com/videos/watch-welcoming-jess-bush-to-the-ready-room',
diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py
index 7daee2fe0..63da9662a 100644
--- a/yt_dlp/extractor/steam.py
+++ b/yt_dlp/extractor/steam.py
@@ -2,9 +2,10 @@ import re
from .common import InfoExtractor
from ..utils import (
- extract_attributes,
ExtractorError,
+ extract_attributes,
get_element_by_class,
+ str_or_none,
)
@@ -30,7 +31,6 @@ class SteamIE(InfoExtractor):
'ext': 'mp4',
'title': 'Terraria video 256785003',
'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
- 'n_entries': 2,
}
},
{
@@ -39,9 +39,7 @@ class SteamIE(InfoExtractor):
'id': '2040428',
'ext': 'mp4',
'title': 'Terraria video 2040428',
- 'playlist_index': 2,
'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
- 'n_entries': 2,
}
}
],
@@ -55,12 +53,10 @@ class SteamIE(InfoExtractor):
}, {
'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
'info_dict': {
- 'id': '256757115',
- 'title': 'Grand Theft Auto V video 256757115',
- 'ext': 'mp4',
- 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
- 'n_entries': 20,
+ 'id': '271590',
+ 'title': 'Grand Theft Auto V',
},
+ 'playlist_count': 23,
}]
def _real_extract(self, url):
@@ -136,7 +132,7 @@ class SteamCommunityBroadcastIE(InfoExtractor):
'id': '76561199073851486',
'title': r're:Steam Community :: pepperm!nt :: Broadcast 2022-06-26 \d{2}:\d{2}',
'ext': 'mp4',
- 'uploader_id': 1113585758,
+ 'uploader_id': '1113585758',
'uploader': 'pepperm!nt',
'live_status': 'is_live',
},
@@ -169,6 +165,6 @@ class SteamCommunityBroadcastIE(InfoExtractor):
'live_status': 'is_live',
'view_count': json_data.get('num_view'),
'uploader': uploader_json.get('persona_name'),
- 'uploader_id': uploader_json.get('accountid'),
+ 'uploader_id': str_or_none(uploader_json.get('accountid')),
'subtitles': subs,
}
diff --git a/yt_dlp/extractor/streamff.py b/yt_dlp/extractor/streamff.py
deleted file mode 100644
index 93c42942c..000000000
--- a/yt_dlp/extractor/streamff.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from .common import InfoExtractor
-from ..utils import int_or_none, parse_iso8601
-
-
-class StreamFFIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?streamff\.com/v/(?P[a-zA-Z0-9]+)'
-
- _TESTS = [{
- 'url': 'https://streamff.com/v/55cc94',
- 'md5': '8745a67bb5e5c570738efe7983826370',
- 'info_dict': {
- 'id': '55cc94',
- 'ext': 'mp4',
- 'title': '55cc94',
- 'timestamp': 1634764643,
- 'upload_date': '20211020',
- 'view_count': int,
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- json_data = self._download_json(f'https://streamff.com/api/videos/{video_id}', video_id)
- return {
- 'id': video_id,
- 'title': json_data.get('name') or video_id,
- 'url': 'https://streamff.com/%s' % json_data['videoLink'],
- 'view_count': int_or_none(json_data.get('views')),
- 'timestamp': parse_iso8601(json_data.get('date')),
- }
diff --git a/yt_dlp/extractor/swearnet.py b/yt_dlp/extractor/swearnet.py
index 6e216a2a5..aeaff28f2 100644
--- a/yt_dlp/extractor/swearnet.py
+++ b/yt_dlp/extractor/swearnet.py
@@ -1,5 +1,5 @@
from .common import InfoExtractor
-from ..utils import int_or_none, traverse_obj
+from ..utils import ExtractorError, int_or_none, traverse_obj
class SwearnetEpisodeIE(InfoExtractor):
@@ -51,7 +51,13 @@ class SwearnetEpisodeIE(InfoExtractor):
display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num')
webpage = self._download_webpage(url, display_id)
- external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
+ try:
+ external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid')
+ except ExtractorError:
+ if 'Upgrade Now' in webpage:
+ self.raise_login_required()
+ raise
+
json_data = self._download_json(
f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0]
diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py
index afcdbf780..bd2d73842 100644
--- a/yt_dlp/extractor/syfy.py
+++ b/yt_dlp/extractor/syfy.py
@@ -6,6 +6,7 @@ from ..utils import (
class SyfyIE(AdobePassIE):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P[^/?#]+)'
_TESTS = [{
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py
index e23b490b0..c69c13d0b 100644
--- a/yt_dlp/extractor/tagesschau.py
+++ b/yt_dlp/extractor/tagesschau.py
@@ -12,6 +12,7 @@ from ..utils import (
class TagesschauIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P[^/]+/(?:[^/]+/)*?(?P[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
_TESTS = [{
diff --git a/yt_dlp/extractor/tass.py b/yt_dlp/extractor/tass.py
index 67e544a6a..d4c5b41a7 100644
--- a/yt_dlp/extractor/tass.py
+++ b/yt_dlp/extractor/tass.py
@@ -8,6 +8,7 @@ from ..utils import (
class TassIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P\d+)'
_TESTS = [
{
diff --git a/yt_dlp/extractor/tdslifeway.py b/yt_dlp/extractor/tdslifeway.py
deleted file mode 100644
index 3623a68c8..000000000
--- a/yt_dlp/extractor/tdslifeway.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from .common import InfoExtractor
-
-
-class TDSLifewayIE(InfoExtractor):
- _VALID_URL = r'https?://tds\.lifeway\.com/v1/trainingdeliverysystem/courses/(?P\d+)/index\.html'
-
- _TEST = {
- # From http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers
- 'url': 'http://tds.lifeway.com/v1/trainingdeliverysystem/courses/3453494717001/index.html?externalRegistration=AssetId%7C34F466F1-78F3-4619-B2AB-A8EFFA55E9E9%21InstanceId%7C0%21UserId%7Caaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa&grouping=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&activity_id=http%3A%2F%2Flifeway.com%2Fvideo%2F3453494717001&content_endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2Fcontent%2F&actor=%7B%22name%22%3A%5B%22Guest%20Guest%22%5D%2C%22account%22%3A%5B%7B%22accountServiceHomePage%22%3A%22http%3A%2F%2Fscorm.lifeway.com%2F%22%2C%22accountName%22%3A%22aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa%22%7D%5D%2C%22objectType%22%3A%22Agent%22%7D&content_token=462a50b2-b6f9-4970-99b1-930882c499fb®istration=93d6ec8e-7f7b-4ed3-bbc8-a857913c0b2a&externalConfiguration=access%7CFREE%21adLength%7C-1%21assignOrgId%7C4AE36F78-299A-425D-91EF-E14A899B725F%21assignOrgParentId%7C%21courseId%7C%21isAnonymous%7Cfalse%21previewAsset%7Cfalse%21previewLength%7C-1%21previewMode%7Cfalse%21royalty%7CFREE%21sessionId%7C671422F9-8E79-48D4-9C2C-4EE6111EA1CD%21trackId%7C&auth=Basic%20OjhmZjk5MDBmLTBlYTMtNDJhYS04YjFlLWE4MWQ3NGNkOGRjYw%3D%3D&endpoint=http%3A%2F%2Ftds.lifeway.com%2Fv1%2Ftrainingdeliverysystem%2FScormEngineInterface%2FTCAPI%2F',
- 'info_dict': {
- 'id': '3453494717001',
- 'ext': 'mp4',
- 'title': 'The Gospel by Numbers',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'upload_date': '20140410',
- 'description': 'Coming soon from T4G 2014!',
- 'uploader_id': '2034960640001',
- 'timestamp': 1397145591,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['BrightcoveNew'],
- }
-
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/2034960640001/default_default/index.html?videoId=%s'
-
- def _real_extract(self, url):
- brightcove_id = self._match_id(url)
- return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py
index 01906bda9..5eac9aa3f 100644
--- a/yt_dlp/extractor/teachable.py
+++ b/yt_dlp/extractor/teachable.py
@@ -99,6 +99,7 @@ class TeachableBaseIE(InfoExtractor):
class TeachableIE(TeachableBaseIE):
+ _WORKING = False
_VALID_URL = r'''(?x)
(?:
%shttps?://(?P[^/]+)|
diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py
index c3eec2784..90a976297 100644
--- a/yt_dlp/extractor/teachertube.py
+++ b/yt_dlp/extractor/teachertube.py
@@ -9,6 +9,7 @@ from ..utils import (
class TeacherTubeIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'teachertube'
IE_DESC = 'teachertube.com videos'
@@ -87,6 +88,7 @@ class TeacherTubeIE(InfoExtractor):
class TeacherTubeUserIE(InfoExtractor):
+ _WORKING = False
IE_NAME = 'teachertube:user:collection'
IE_DESC = 'teachertube.com user and collection videos'
diff --git a/yt_dlp/extractor/teachingchannel.py b/yt_dlp/extractor/teachingchannel.py
index 275f6d1f9..5791292a9 100644
--- a/yt_dlp/extractor/teachingchannel.py
+++ b/yt_dlp/extractor/teachingchannel.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
class TeachingChannelIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P[^/?]+)'
_TEST = {
diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py
index 9260db2b4..72f67e402 100644
--- a/yt_dlp/extractor/tele5.py
+++ b/yt_dlp/extractor/tele5.py
@@ -7,6 +7,7 @@ from ..utils import (
class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P[^/?#&]+)'
_GEO_COUNTRIES = ['DE']
_TESTS = [{
diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py
index 3d29dace3..a71b14c27 100644
--- a/yt_dlp/extractor/telemb.py
+++ b/yt_dlp/extractor/telemb.py
@@ -5,6 +5,7 @@ from ..utils import remove_start
class TeleMBIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P.+?)_d_(?P\d+)\.html'
_TESTS = [
{
diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py
index 54e74a6c0..84b24dead 100644
--- a/yt_dlp/extractor/telemundo.py
+++ b/yt_dlp/extractor/telemundo.py
@@ -4,7 +4,7 @@ from ..utils import try_get, unified_timestamp
class TelemundoIE(InfoExtractor):
-
+ _WORKING = False
_VALID_URL = r'https?:\/\/(?:www\.)?telemundo\.com\/.+?video\/[^\/]+(?Ptmvo\d{7})'
_TESTS = [{
'url': 'https://www.telemundo.com/noticias/noticias-telemundo-en-la-noche/empleo/video/esta-aplicacion-gratuita-esta-ayudando-los-latinos-encontrar-trabajo-en-estados-unidos-tmvo9829325',
diff --git a/yt_dlp/extractor/teletask.py b/yt_dlp/extractor/teletask.py
index a73dd68fb..fd831f580 100644
--- a/yt_dlp/extractor/teletask.py
+++ b/yt_dlp/extractor/teletask.py
@@ -5,6 +5,7 @@ from ..utils import unified_strdate
class TeleTaskIE(InfoExtractor):
+ _WORKING = False
_VALID_URL = r'https?://(?:www\.)?tele-task\.de/archive/video/html5/(?P[0-9]+)'
_TEST = {
'url': 'http://www.tele-task.de/archive/video/html5/26168/',
diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py
index 7ce7cbf84..a98275d86 100644
--- a/yt_dlp/extractor/tenplay.py
+++ b/yt_dlp/extractor/tenplay.py
@@ -20,7 +20,8 @@ class TenPlayIE(InfoExtractor):
'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
'duration': 186,
- 'season': 39,
+ 'season': 'Season 39',
+ 'season_number': 39,
'series': 'Neighbours',
'thumbnail': r're:https://.*\.jpg',
'uploader': 'Channel 10',
@@ -108,7 +109,7 @@ class TenPlayIE(InfoExtractor):
'description': data.get('description'),
'age_limit': self._AUS_AGES.get(data.get('classification')),
'series': data.get('tvShow'),
- 'season': int_or_none(data.get('season')),
+ 'season_number': int_or_none(data.get('season')),
'episode_number': int_or_none(data.get('episode')),
'timestamp': data.get('published'),
'thumbnail': data.get('imageUrl'),
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index f26972cff..aa8356796 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -6,7 +6,7 @@ import string
import time
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
+from ..compat import compat_urllib_parse_urlparse
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
@@ -15,7 +15,6 @@ from ..utils import (
UserNotLive,
determine_ext,
format_field,
- get_first,
int_or_none,
join_nonempty,
merge_dicts,
@@ -51,7 +50,13 @@ class TikTokBaseIE(InfoExtractor):
def _get_sigi_state(self, webpage, display_id):
return self._search_json(
r'')
+ 'sigi state', display_id, end_pattern=r'', default={})
+
+ def _get_universal_data(self, webpage, display_id):
+ return traverse_obj(self._search_json(
+ r'', default={}),
+ ('__DEFAULT_SCOPE__', {dict})) or {}
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
@@ -219,8 +224,8 @@ class TikTokBaseIE(InfoExtractor):
def extract_addr(addr, add_meta={}):
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
if res:
- known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height') or addr.get('height'))
- known_resolutions[res].setdefault('width', add_meta.get('width') or addr.get('width'))
+ known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
+ known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
parsed_meta.update(known_resolutions.get(res, {}))
add_meta.setdefault('height', int_or_none(res[:-1]))
return [{
@@ -237,22 +242,26 @@ class TikTokBaseIE(InfoExtractor):
# Hack: Add direct video links first to prioritize them when removing duplicate formats
formats = []
+ width = int_or_none(video_info.get('width'))
+ height = int_or_none(video_info.get('height'))
if video_info.get('play_addr'):
formats.extend(extract_addr(video_info['play_addr'], {
'format_id': 'play_addr',
'format_note': 'Direct video',
'vcodec': 'h265' if traverse_obj(
video_info, 'is_bytevc1', 'is_h265') else 'h264', # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
- 'width': video_info.get('width'),
- 'height': video_info.get('height'),
+ 'width': width,
+ 'height': height,
}))
if video_info.get('download_addr'):
- formats.extend(extract_addr(video_info['download_addr'], {
+ download_addr = video_info['download_addr']
+ dl_width = int_or_none(download_addr.get('width'))
+ formats.extend(extract_addr(download_addr, {
'format_id': 'download_addr',
'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''),
'vcodec': 'h264',
- 'width': video_info.get('width'),
- 'height': video_info.get('height'),
+ 'width': dl_width or width,
+ 'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong
'preference': -2 if video_info.get('has_watermark') else -1,
}))
if video_info.get('play_addr_h264'):
@@ -311,13 +320,10 @@ class TikTokBaseIE(InfoExtractor):
if is_generic_og_trackname:
music_track, music_author = contained_music_track or 'original sound', contained_music_author
else:
- music_track, music_author = music_info.get('title'), music_info.get('author')
+ music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str}))
return {
'id': aweme_id,
- 'extractor_key': TikTokIE.ie_key(),
- 'extractor': TikTokIE.IE_NAME,
- 'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
**traverse_obj(aweme_detail, {
'title': ('desc', {str}),
'description': ('desc', {str}),
@@ -330,15 +336,16 @@ class TikTokBaseIE(InfoExtractor):
'comment_count': 'comment_count',
}, expected_type=int_or_none),
**traverse_obj(author_info, {
- 'uploader': 'unique_id',
- 'uploader_id': 'uid',
- 'creator': 'nickname',
- 'channel_id': 'sec_uid',
- }, expected_type=str_or_none),
+ 'uploader': ('unique_id', {str}),
+ 'uploader_id': ('uid', {str_or_none}),
+ 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
+ 'channel': ('nickname', {str}),
+ 'channel_id': ('sec_uid', {str}),
+ }),
'uploader_url': user_url,
'track': music_track,
'album': str_or_none(music_info.get('album')) or None,
- 'artist': music_author or None,
+ 'artists': re.split(r'(?:, | & )', music_author) if music_author else None,
'formats': formats,
'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
'thumbnails': thumbnails,
@@ -399,7 +406,8 @@ class TikTokBaseIE(InfoExtractor):
'timestamp': ('createTime', {int_or_none}),
}),
**traverse_obj(author_info or aweme_detail, {
- 'creator': ('nickname', {str}),
+ 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat
+ 'channel': ('nickname', {str}),
'uploader': (('uniqueId', 'author'), {str}),
'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
}, get_all=False),
@@ -410,10 +418,10 @@ class TikTokBaseIE(InfoExtractor):
'comment_count': 'commentCount',
}, expected_type=int_or_none),
**traverse_obj(music_info, {
- 'track': 'title',
- 'album': ('album', {lambda x: x or None}),
- 'artist': 'authorName',
- }, expected_type=str),
+ 'track': ('title', {str}),
+ 'album': ('album', {str}, {lambda x: x or None}),
+ 'artists': ('authorName', {str}, {lambda x: [x] if x else None}),
+ }),
'channel_id': channel_id,
'uploader_url': user_url,
'formats': formats,
@@ -470,7 +478,8 @@ class TikTokIE(TikTokBaseIE):
'uploader_id': '18702747',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
- 'creator': 'patroX',
+ 'channel': 'patroX',
+ 'creators': ['patroX'],
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
'upload_date': '20190930',
'timestamp': 1569860870,
@@ -478,7 +487,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
- 'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson',
+ 'artists': ['Evan Todd', 'Jessica Keenan Wynn', 'Alice Lee', 'Barrett Wilbert Weed', 'Jon Eidson'],
'track': 'Big Fun',
},
}, {
@@ -490,12 +499,13 @@ class TikTokIE(TikTokBaseIE):
'title': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
'uploader': 'barudakhb_',
- 'creator': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
+ 'channel': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
+ 'creators': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
'uploader_id': '6974687867511718913',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
'track': 'Boka Dance',
- 'artist': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
+ 'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'],
'timestamp': 1626121503,
'duration': 18,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
@@ -514,7 +524,8 @@ class TikTokIE(TikTokBaseIE):
'title': 'Slap and Run!',
'description': 'Slap and Run!',
'uploader': 'user440922249',
- 'creator': 'Slap And Run',
+ 'channel': 'Slap And Run',
+ 'creators': ['Slap And Run'],
'uploader_id': '7036055384943690754',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
@@ -538,7 +549,8 @@ class TikTokIE(TikTokBaseIE):
'title': 'TikTok video #7059698374567611694',
'description': '',
'uploader': 'pokemonlife22',
- 'creator': 'Pokemon',
+ 'channel': 'Pokemon',
+ 'creators': ['Pokemon'],
'uploader_id': '6820838815978423302',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
@@ -547,7 +559,7 @@ class TikTokIE(TikTokBaseIE):
'duration': 6,
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
'upload_date': '20220201',
- 'artist': 'Pokemon',
+ 'artists': ['Pokemon'],
'view_count': int,
'like_count': int,
'repost_count': int,
@@ -584,12 +596,13 @@ class TikTokIE(TikTokBaseIE):
'ext': 'mp3',
'title': 'TikTok video #7139980461132074283',
'description': '',
- 'creator': 'Antaura',
+ 'channel': 'Antaura',
+ 'creators': ['Antaura'],
'uploader': '_le_cannibale_',
'uploader_id': '6604511138619654149',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
- 'artist': 'nathan !',
+ 'artists': ['nathan !'],
'track': 'grahamscott canon',
'upload_date': '20220905',
'timestamp': 1662406249,
@@ -597,23 +610,24 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'repost_count': int,
'comment_count': int,
- 'thumbnail': r're:^https://.+\.webp',
+ 'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
},
}, {
# only available via web
- 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662',
+ 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', # FIXME
'md5': '6aba7fad816e8709ff2c149679ace165',
'info_dict': {
'id': '7206382937372134662',
'ext': 'mp4',
'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
- 'creator': 'MoxyPatch',
+ 'channel': 'MoxyPatch',
+ 'creators': ['MoxyPatch'],
'uploader': 'moxypatch',
'uploader_id': '7039142049363379205',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
- 'artist': 'your worst nightmare',
+ 'artists': ['your worst nightmare'],
'track': 'original sound',
'upload_date': '20230303',
'timestamp': 1677866781,
@@ -628,7 +642,7 @@ class TikTokIE(TikTokBaseIE):
'expected_warnings': ['Unable to find video in feed'],
}, {
# 1080p format
- 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830',
+ 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', # FIXME
'md5': '982512017a8a917124d5a08c8ae79621',
'info_dict': {
'id': '7107337212743830830',
@@ -639,8 +653,9 @@ class TikTokIE(TikTokBaseIE):
'uploader_id': '86328792343818240',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd',
- 'creator': 'tate mcrae',
- 'artist': 'tate mcrae',
+ 'channel': 'tate mcrae',
+ 'creators': ['tate mcrae'],
+ 'artists': ['tate mcrae'],
'track': 'original sound',
'upload_date': '20220609',
'timestamp': 1654805899,
@@ -651,7 +666,7 @@ class TikTokIE(TikTokBaseIE):
'comment_count': int,
'thumbnail': r're:^https://.+\.webp',
},
- 'params': {'format': 'bytevc1_1080p_808907-0'},
+ 'skip': 'Unavailable via feed API, no formats available via web',
}, {
# Slideshow, audio-only m4a format
'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594',
@@ -665,8 +680,9 @@ class TikTokIE(TikTokBaseIE):
'uploader_id': '6582536342634676230',
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB',
- 'creator': 'лампочка',
- 'artist': 'Øneheart',
+ 'channel': 'лампочка',
+ 'creators': ['лампочка'],
+ 'artists': ['Øneheart'],
'album': 'watching the stars',
'track': 'watching the stars',
'upload_date': '20230708',
@@ -675,7 +691,7 @@ class TikTokIE(TikTokBaseIE):
'like_count': int,
'comment_count': int,
'repost_count': int,
- 'thumbnail': r're:^https://.+\.webp',
+ 'thumbnail': r're:^https://.+\.(?:webp|jpe?g)',
},
}, {
# Auto-captions available
@@ -688,24 +704,35 @@ class TikTokIE(TikTokBaseIE):
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
+ e.expected = True
self.report_warning(f'{e}; trying with webpage')
url = self._create_url(user_id, video_id)
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
- next_data = self._search_nextjs_data(webpage, video_id, default='{}')
- if next_data:
- status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
- video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct'), expected_type=dict)
+
+ if universal_data := self._get_universal_data(webpage, video_id):
+ self.write_debug('Found universal data for rehydration')
+ status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0
+ video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict}))
+
+ elif sigi_data := self._get_sigi_state(webpage, video_id):
+ self.write_debug('Found sigi state data')
+ status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
+ video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
+
+ elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
+ self.write_debug('Found next.js data')
+ status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
+ video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
+
else:
- sigi_data = self._get_sigi_state(webpage, video_id)
- status = traverse_obj(sigi_data, ('VideoPage', 'statusCode'), expected_type=int) or 0
- video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
+ raise ExtractorError('Unable to extract webpage video data')
- if status == 0:
+ if video_data and status == 0:
return self._parse_aweme_video_web(video_data, url, video_id)
elif status == 10216:
raise ExtractorError('This video is private', expected=True)
- raise ExtractorError('Video not available', video_id=video_id)
+ raise ExtractorError(f'Video not available, status code {status}', video_id=video_id)
class TikTokUserIE(TikTokBaseIE):
@@ -921,20 +948,23 @@ class DouyinIE(TikTokBaseIE):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P[0-9]+)'
_TESTS = [{
'url': 'https://www.douyin.com/video/6961737553342991651',
- 'md5': 'a97db7e3e67eb57bf40735c022ffa228',
+ 'md5': '9ecce7bc5b302601018ecb2871c63a75',
'info_dict': {
'id': '6961737553342991651',
'ext': 'mp4',
'title': '#杨超越 小小水手带你去远航❤️',
'description': '#杨超越 小小水手带你去远航❤️',
+ 'uploader': '6897520xka',
'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
- 'creator': '杨超越',
- 'duration': 19782,
+ 'channel': '杨超越',
+ 'creators': ['杨超越'],
+ 'duration': 19,
'timestamp': 1620905839,
'upload_date': '20210513',
'track': '@杨超越创作的原声',
+ 'artists': ['杨超越'],
'view_count': int,
'like_count': int,
'repost_count': int,
@@ -943,20 +973,23 @@ class DouyinIE(TikTokBaseIE):
},
}, {
'url': 'https://www.douyin.com/video/6982497745948921092',
- 'md5': '34a87ebff3833357733da3fe17e37c0e',
+ 'md5': '15c5e660b7048af3707304e3cc02bbb5',
'info_dict': {
'id': '6982497745948921092',
'ext': 'mp4',
'title': '这个夏日和小羊@杨超越 一起遇见白色幻想',
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
+ 'uploader': '0731chaoyue',
'uploader_id': '408654318141572',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
- 'creator': '杨超越工作室',
- 'duration': 42479,
+ 'channel': '杨超越工作室',
+ 'creators': ['杨超越工作室'],
+ 'duration': 42,
'timestamp': 1625739481,
'upload_date': '20210708',
'track': '@杨超越工作室创作的原声',
+ 'artists': ['杨超越工作室'],
'view_count': int,
'like_count': int,
'repost_count': int,
@@ -965,20 +998,23 @@ class DouyinIE(TikTokBaseIE):
},
}, {
'url': 'https://www.douyin.com/video/6953975910773099811',
- 'md5': 'dde3302460f19db59c47060ff013b902',
+ 'md5': '0e6443758b8355db9a3c34864a4276be',
'info_dict': {
'id': '6953975910773099811',
'ext': 'mp4',
'title': '#一起看海 出现在你的夏日里',
'description': '#一起看海 出现在你的夏日里',
+ 'uploader': '6897520xka',
'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
- 'creator': '杨超越',
- 'duration': 17343,
+ 'channel': '杨超越',
+ 'creators': ['杨超越'],
+ 'duration': 17,
'timestamp': 1619098692,
'upload_date': '20210422',
'track': '@杨超越创作的原声',
+ 'artists': ['杨超越'],
'view_count': int,
'like_count': int,
'repost_count': int,
@@ -1004,20 +1040,23 @@ class DouyinIE(TikTokBaseIE):
'skip': 'No longer available',
}, {
'url': 'https://www.douyin.com/video/6963263655114722595',
- 'md5': 'cf9f11f0ec45d131445ec2f06766e122',
+ 'md5': '1440bcf59d8700f8e014da073a4dfea8',
'info_dict': {
'id': '6963263655114722595',
'ext': 'mp4',
'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
+ 'uploader': '6897520xka',
'uploader_id': '110403406559',
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
- 'creator': '杨超越',
- 'duration': 15115,
+ 'channel': '杨超越',
+ 'creators': ['杨超越'],
+ 'duration': 15,
'timestamp': 1621261163,
'upload_date': '20210517',
'track': '@杨超越创作的原声',
+ 'artists': ['杨超越'],
'view_count': int,
'like_count': int,
'repost_count': int,
@@ -1025,34 +1064,23 @@ class DouyinIE(TikTokBaseIE):
'thumbnail': r're:https?://.+\.jpe?g',
},
}]
- _APP_VERSIONS = [('23.3.0', '230300')]
- _APP_NAME = 'aweme'
- _AID = 1128
- _API_HOSTNAME = 'aweme.snssdk.com'
_UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s'
_WEBPAGE_HOST = 'https://www.douyin.com/'
def _real_extract(self, url):
video_id = self._match_id(url)
- try:
- return self._extract_aweme_app(video_id)
- except ExtractorError as e:
- e.expected = True
- self.to_screen(f'{e}; trying with webpage')
-
- webpage = self._download_webpage(url, video_id)
- render_data = self._search_json(
- r'