Merge branch 'master' into murrtube

pull/9249/head
bashonly 3 months ago committed by GitHub
commit 7b15ee0652
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -164,7 +164,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
yt-dlp yt-dlp
yt-dlp.tar.gz yt-dlp.tar.gz
@ -227,7 +227,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-linux_${{ matrix.architecture }} name: build-bin-linux_${{ matrix.architecture }}
path: | # run-on-arch-action designates armv7l as armv7 path: | # run-on-arch-action designates armv7l as armv7
repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}
compression-level: 0 compression-level: 0
@ -271,7 +271,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos dist/yt-dlp_macos
dist/yt-dlp_macos.zip dist/yt-dlp_macos.zip
@ -324,7 +324,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_macos_legacy dist/yt-dlp_macos_legacy
compression-level: 0 compression-level: 0
@ -373,7 +373,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp.exe dist/yt-dlp.exe
dist/yt-dlp_min.exe dist/yt-dlp_min.exe
@ -421,7 +421,7 @@ jobs:
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: build-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp_x86.exe dist/yt-dlp_x86.exe
compression-level: 0 compression-level: 0
@ -441,7 +441,7 @@ jobs:
- uses: actions/download-artifact@v4 - uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-* pattern: build-bin-*
merge-multiple: true merge-multiple: true
- name: Make SHA2-SUMS files - name: Make SHA2-SUMS files
@ -484,3 +484,4 @@ jobs:
_update_spec _update_spec
SHA*SUMS* SHA*SUMS*
compression-level: 0 compression-level: 0
overwrite: true

@ -167,8 +167,8 @@ For ease of use, a few more compat options are available:
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
* `--compat-options 2023`: Same as `--compat-options prefer-legacy-http-handler,manifest-filesize-approx`. Use this to enable all future compat options * `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
# INSTALLATION # INSTALLATION
@ -218,7 +218,7 @@ Example usage:
yt-dlp --update-to nightly yt-dlp --update-to nightly
# To install nightly with pip: # To install nightly with pip:
python -m pip install -U --pre yt-dlp python -m pip install -U --pre yt-dlp[default]
``` ```
<!-- MANPAGE: BEGIN EXCLUDED SECTION --> <!-- MANPAGE: BEGIN EXCLUDED SECTION -->
@ -1310,8 +1310,11 @@ The available fields are:
- `description` (string): The description of the video - `description` (string): The description of the video
- `display_id` (string): An alternative identifier for the video - `display_id` (string): An alternative identifier for the video
- `uploader` (string): Full name of the video uploader - `uploader` (string): Full name of the video uploader
- `uploader_id` (string): Nickname or id of the video uploader
- `uploader_url` (string): URL to the video uploader's profile
- `license` (string): License name the video is licensed under - `license` (string): License name the video is licensed under
- `creator` (string): The creator of the video - `creators` (list): The creators of the video
- `creator` (string): The creators of the video; comma-separated
- `timestamp` (numeric): UNIX timestamp of the moment the video became available - `timestamp` (numeric): UNIX timestamp of the moment the video became available
- `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `upload_date` (string): Video upload date in UTC (YYYYMMDD)
- `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released
@ -1319,9 +1322,9 @@ The available fields are:
- `release_year` (numeric): Year (YYYY) when the video or album was released - `release_year` (numeric): Year (YYYY) when the video or album was released
- `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified
- `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC
- `uploader_id` (string): Nickname or id of the video uploader
- `channel` (string): Full name of the channel the video is uploaded on - `channel` (string): Full name of the channel the video is uploaded on
- `channel_id` (string): Id of the channel - `channel_id` (string): Id of the channel
- `channel_url` (string): URL of the channel
- `channel_follower_count` (numeric): Number of followers of the channel - `channel_follower_count` (numeric): Number of followers of the channel
- `channel_is_verified` (boolean): Whether the channel is verified on the platform - `channel_is_verified` (boolean): Whether the channel is verified on the platform
- `location` (string): Physical location where the video was filmed - `location` (string): Physical location where the video was filmed
@ -1361,7 +1364,10 @@ The available fields are:
- `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_basename` (string): The basename of the webpage URL
- `webpage_url_domain` (string): The domain of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
- `categories` (list): List of categories the video belongs to
- `tags` (list): List of tags assigned to the video
- `cast` (list): List of cast members
All the fields in [Filtering Formats](#filtering-formats) can also be used All the fields in [Filtering Formats](#filtering-formats) can also be used
Available for the video that belongs to some logical chapter or section: Available for the video that belongs to some logical chapter or section:
@ -1373,6 +1379,7 @@ Available for the video that belongs to some logical chapter or section:
Available for the video that is an episode of some series or programme: Available for the video that is an episode of some series or programme:
- `series` (string): Title of the series or programme the video episode belongs to - `series` (string): Title of the series or programme the video episode belongs to
- `series_id` (string): Id of the series or programme the video episode belongs to
- `season` (string): Title of the season the video episode belongs to - `season` (string): Title of the season the video episode belongs to
- `season_number` (numeric): Number of the season the video episode belongs to - `season_number` (numeric): Number of the season the video episode belongs to
- `season_id` (string): Id of the season the video episode belongs to - `season_id` (string): Id of the season the video episode belongs to
@ -1385,11 +1392,16 @@ Available for the media that is a track or a part of a music album:
- `track` (string): Title of the track - `track` (string): Title of the track
- `track_number` (numeric): Number of the track within an album or a disc - `track_number` (numeric): Number of the track within an album or a disc
- `track_id` (string): Id of the track - `track_id` (string): Id of the track
- `artist` (string): Artist(s) of the track - `artists` (list): Artist(s) of the track
- `genre` (string): Genre(s) of the track - `artist` (string): Artist(s) of the track; comma-separated
- `genres` (list): Genre(s) of the track
- `genre` (string): Genre(s) of the track; comma-separated
- `composers` (list): Composer(s) of the piece
- `composer` (string): Composer(s) of the piece; comma-separated
- `album` (string): Title of the album the track belongs to - `album` (string): Title of the album the track belongs to
- `album_type` (string): Type of the album - `album_type` (string): Type of the album
- `album_artist` (string): List of all artists appeared on the album - `album_artists` (list): All artists appeared on the album
- `album_artist` (string): All artists appeared on the album; comma-separated
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters:
@ -1767,10 +1779,11 @@ Metadata fields | From
`description`, `synopsis` | `description` `description`, `synopsis` | `description`
`purl`, `comment` | `webpage_url` `purl`, `comment` | `webpage_url`
`track` | `track_number` `track` | `track_number`
`artist` | `artist`, `creator`, `uploader` or `uploader_id` `artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id`
`genre` | `genre` `composer` | `composer` or `composers`
`genre` | `genre` or `genres`
`album` | `album` `album` | `album`
`album_artist` | `album_artist` `album_artist` | `album_artist` or `album_artists`
`disc` | `disc_number` `disc` | `disc_number`
`show` | `series` `show` | `series`
`season_number` | `season_number` `season_number` | `season_number`

@ -19,7 +19,7 @@ def parse_args():
parser.add_argument( parser.add_argument(
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)') 'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
parser.add_argument( parser.add_argument(
'-e', '--exclude', metavar='REQUIREMENT', action='append', help='Exclude a required dependency') '-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
parser.add_argument( parser.add_argument(
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group') '-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
parser.add_argument( parser.add_argument(
@ -33,21 +33,28 @@ def parse_args():
def main(): def main():
args = parse_args() args = parse_args()
toml_data = parse_toml(read_file(args.input)) project_table = parse_toml(read_file(args.input))['project']
deps = toml_data['project']['dependencies'] optional_groups = project_table['optional-dependencies']
targets = deps.copy() if not args.only_optional else [] excludes = args.exclude or []
for exclude in args.exclude or []: deps = []
for dep in deps: if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
simplified_dep = re.match(r'[\w-]+', dep)[0] deps.extend(project_table['dependencies'])
if dep in targets and (exclude.lower() == simplified_dep.lower() or exclude == dep): if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
targets.remove(dep) deps.extend(optional_groups['default'])
optional_deps = toml_data['project']['optional-dependencies'] def name(dependency):
for include in args.include or []: return re.match(r'[\w-]+', dependency)[0].lower()
group = optional_deps.get(include)
if group: target_map = {name(dep): dep for dep in deps}
targets.extend(group)
for include in filter(None, map(optional_groups.get, args.include or [])):
target_map.update(zip(map(name, include), include))
for exclude in map(name, excludes):
target_map.pop(exclude, None)
targets = list(target_map.values())
if args.print: if args.print:
for target in targets: for target in targets:

@ -51,6 +51,7 @@ dependencies = [
] ]
[project.optional-dependencies] [project.optional-dependencies]
default = []
secretstorage = [ secretstorage = [
"cffi", "cffi",
"secretstorage", "secretstorage",

@ -223,6 +223,10 @@ def sanitize_got_info_dict(got_dict):
if test_info_dict.get('display_id') == test_info_dict.get('id'): if test_info_dict.get('display_id') == test_info_dict.get('id'):
test_info_dict.pop('display_id') test_info_dict.pop('display_id')
# Remove deprecated fields
for old in YoutubeDL._deprecated_multivalue_fields.keys():
test_info_dict.pop(old, None)
# release_year may be generated from release_date # release_year may be generated from release_date
if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
test_info_dict.pop('release_year') test_info_dict.pop('release_year')

@ -941,7 +941,7 @@ class TestYoutubeDL(unittest.TestCase):
def get_videos(filter_=None): def get_videos(filter_=None):
ydl = YDL({'match_filter': filter_, 'simulate': True}) ydl = YDL({'match_filter': filter_, 'simulate': True})
for v in videos: for v in videos:
ydl.process_ie_result(v, download=True) ydl.process_ie_result(v.copy(), download=True)
return [v['id'] for v in ydl.downloaded_info_dicts] return [v['id'] for v in ydl.downloaded_info_dicts]
res = get_videos() res = get_videos()

@ -192,8 +192,8 @@ class TestWebsSocketRequestHandlerConformance:
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True) @pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
@pytest.mark.parametrize('params,extensions', [ @pytest.mark.parametrize('params,extensions', [
({'timeout': 0.00001}, {}), ({'timeout': sys.float_info.min}, {}),
({}, {'timeout': 0.00001}), ({}, {'timeout': sys.float_info.min}),
]) ])
def test_timeout(self, handler, params, extensions): def test_timeout(self, handler, params, extensions):
with handler(**params) as rh: with handler(**params) as rh:

@ -580,6 +580,13 @@ class YoutubeDL:
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
} }
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
'artist': 'artists',
'composer': 'composers',
'creator': 'creators',
'genre': 'genres',
}
_format_selection_exts = { _format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio), 'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
@ -683,7 +690,6 @@ class YoutubeDL:
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers')) self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
self.params['http_headers'].pop('Cookie', None) self.params['http_headers'].pop('Cookie', None)
self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
if auto_init and auto_init != 'no_verbose_header': if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header() self.print_debug_header()
@ -957,6 +963,7 @@ class YoutubeDL:
def close(self): def close(self):
self.save_cookies() self.save_cookies()
self._request_director.close() self._request_director.close()
del self._request_director
def trouble(self, message=None, tb=None, is_error=True): def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears. """Determine action to take when a download problem appears.
@ -2640,6 +2647,14 @@ class YoutubeDL:
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
for old_key, new_key in self._deprecated_multivalue_fields.items():
if new_key in info_dict and old_key in info_dict:
self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
elif old_value := info_dict.get(old_key):
info_dict[new_key] = old_value.split(', ')
elif new_value := info_dict.get(new_key):
info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
def _raise_pending_errors(self, info): def _raise_pending_errors(self, info):
err = info.pop('__pending_error', None) err = info.pop('__pending_error', None)
if err: if err:
@ -3483,7 +3498,8 @@ class YoutubeDL:
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
'Possible MPEG-TS in MP4 container or malformed AAC timestamps', 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP) FFmpegFixupM3u8PP)
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments', ffmpeg_fixup(downloader == 'dashsegments'
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
@ -4144,6 +4160,10 @@ class YoutubeDL:
director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0) director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
return director return director
@functools.cached_property
def _request_director(self):
return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
def encode(self, s): def encode(self, s):
if isinstance(s, bytes): if isinstance(s, bytes):
return s # Already encoded return s # Already encoded

@ -14,7 +14,7 @@ import os
import re import re
import traceback import traceback
from .compat import compat_shlex_quote from .compat import compat_os_name, compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -984,7 +984,28 @@ def _real_main(argv=None):
if pre_process: if pre_process:
return ydl._download_retcode return ydl._download_retcode
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) args = sys.argv[1:] if argv is None else argv
ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes
import msvcrt
kernel32 = ctypes.WinDLL('Kernel32')
buffer = (1 * ctypes.wintypes.DWORD)()
attached_processes = kernel32.GetConsoleProcessList(buffer, 1)
# If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or is_onefile and attached_processes == 2:
print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: '
'https://github.com/yt-dlp/yt-dlp#readme'))
msvcrt.getch()
_exit(2)
parser.error( parser.error(
'You must provide at least one URL.\n' 'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.') 'Type yt-dlp --help to see a list of all options.')

@ -320,7 +320,6 @@ from .cbs import (
CBSIE, CBSIE,
ParamountPressExpressIE, ParamountPressExpressIE,
) )
from .cbsinteractive import CBSInteractiveIE
from .cbsnews import ( from .cbsnews import (
CBSNewsEmbedIE, CBSNewsEmbedIE,
CBSNewsIE, CBSNewsIE,
@ -348,10 +347,6 @@ from .cgtn import CGTNIE
from .charlierose import CharlieRoseIE from .charlierose import CharlieRoseIE
from .chaturbate import ChaturbateIE from .chaturbate import ChaturbateIE
from .chilloutzone import ChilloutzoneIE from .chilloutzone import ChilloutzoneIE
from .chingari import (
ChingariIE,
ChingariUserIE,
)
from .chzzk import ( from .chzzk import (
CHZZKLiveIE, CHZZKLiveIE,
CHZZKVideoIE, CHZZKVideoIE,
@ -369,7 +364,6 @@ from .ciscolive import (
from .ciscowebex import CiscoWebexIE from .ciscowebex import CiscoWebexIE
from .cjsw import CJSWIE from .cjsw import CJSWIE
from .clipchamp import ClipchampIE from .clipchamp import ClipchampIE
from .cliphunter import CliphunterIE
from .clippit import ClippitIE from .clippit import ClippitIE
from .cliprs import ClipRsIE from .cliprs import ClipRsIE
from .closertotruth import CloserToTruthIE from .closertotruth import CloserToTruthIE
@ -379,7 +373,6 @@ from .clubic import ClubicIE
from .clyp import ClypIE from .clyp import ClypIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnbc import ( from .cnbc import (
CNBCIE,
CNBCVideoIE, CNBCVideoIE,
) )
from .cnn import ( from .cnn import (
@ -445,6 +438,7 @@ from .dailymail import DailyMailIE
from .dailymotion import ( from .dailymotion import (
DailymotionIE, DailymotionIE,
DailymotionPlaylistIE, DailymotionPlaylistIE,
DailymotionSearchIE,
DailymotionUserIE, DailymotionUserIE,
) )
from .dailywire import ( from .dailywire import (
@ -476,7 +470,6 @@ from .dlf import (
) )
from .dfb import DFBIE from .dfb import DFBIE
from .dhm import DHMIE from .dhm import DHMIE
from .digg import DiggIE
from .douyutv import ( from .douyutv import (
DouyuShowIE, DouyuShowIE,
DouyuTVIE, DouyuTVIE,
@ -610,7 +603,6 @@ from .fc2 import (
) )
from .fczenit import FczenitIE from .fczenit import FczenitIE
from .fifa import FifaIE from .fifa import FifaIE
from .filmmodu import FilmmoduIE
from .filmon import ( from .filmon import (
FilmOnIE, FilmOnIE,
FilmOnChannelIE, FilmOnChannelIE,
@ -676,7 +668,6 @@ from .gab import (
GabIE, GabIE,
) )
from .gaia import GaiaIE from .gaia import GaiaIE
from .gameinformer import GameInformerIE
from .gamejolt import ( from .gamejolt import (
GameJoltIE, GameJoltIE,
GameJoltUserIE, GameJoltUserIE,
@ -705,7 +696,6 @@ from .gettr import (
GettrStreamingIE, GettrStreamingIE,
) )
from .giantbomb import GiantBombIE from .giantbomb import GiantBombIE
from .giga import GigaIE
from .glide import GlideIE from .glide import GlideIE
from .globalplayer import ( from .globalplayer import (
GlobalPlayerLiveIE, GlobalPlayerLiveIE,
@ -896,10 +886,8 @@ from .jtbc import (
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .kakao import KakaoIE from .kakao import KakaoIE
from .kaltura import KalturaIE from .kaltura import KalturaIE
from .kanal2 import Kanal2IE
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .karrierevideos import KarriereVideosIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
@ -915,13 +903,11 @@ from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE from .kommunetv import KommunetvIE
from .kompas import KompasVideoIE from .kompas import KompasVideoIE
from .konserthusetplay import KonserthusetPlayIE
from .koo import KooIE from .koo import KooIE
from .kth import KTHIE from .kth import KTHIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .kukululive import KukuluLiveIE from .kukululive import KukuluLiveIE
from .kusi import KUSIIE
from .kuwo import ( from .kuwo import (
KuwoIE, KuwoIE,
KuwoAlbumIE, KuwoAlbumIE,
@ -1003,7 +989,6 @@ from .lnkgo import (
LnkGoIE, LnkGoIE,
LnkIE, LnkIE,
) )
from .localnews8 import LocalNews8IE
from .lovehomeporn import LoveHomePornIE from .lovehomeporn import LoveHomePornIE
from .lrt import ( from .lrt import (
LRTVODIE, LRTVODIE,
@ -1030,7 +1015,6 @@ from .mailru import (
MailRuMusicSearchIE, MailRuMusicSearchIE,
) )
from .mainstreaming import MainStreamingIE from .mainstreaming import MainStreamingIE
from .malltv import MallTVIE
from .mangomolo import ( from .mangomolo import (
MangomoloVideoIE, MangomoloVideoIE,
MangomoloLiveIE, MangomoloLiveIE,
@ -1074,7 +1058,6 @@ from .meipai import MeipaiIE
from .melonvod import MelonVODIE from .melonvod import MelonVODIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mgtv import MGTVIE from .mgtv import MGTVIE
from .miaopai import MiaoPaiIE
from .microsoftstream import MicrosoftStreamIE from .microsoftstream import MicrosoftStreamIE
from .microsoftvirtualacademy import ( from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyIE,
@ -1092,7 +1075,6 @@ from .minds import (
MindsChannelIE, MindsChannelIE,
MindsGroupIE, MindsGroupIE,
) )
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE from .minoto import MinotoIE
from .mirrativ import ( from .mirrativ import (
MirrativIE, MirrativIE,
@ -1120,7 +1102,6 @@ from .mlssoccer import MLSSoccerIE
from .mocha import MochaVideoIE from .mocha import MochaVideoIE
from .mojvideo import MojvideoIE from .mojvideo import MojvideoIE
from .monstercat import MonstercatIE from .monstercat import MonstercatIE
from .morningstar import MorningstarIE
from .motherless import ( from .motherless import (
MotherlessIE, MotherlessIE,
MotherlessGroupIE, MotherlessGroupIE,
@ -1365,7 +1346,6 @@ from .nuvid import NuvidIE
from .nzherald import NZHeraldIE from .nzherald import NZHeraldIE
from .nzonscreen import NZOnScreenIE from .nzonscreen import NZOnScreenIE
from .nzz import NZZIE from .nzz import NZZIE
from .odatv import OdaTVIE
from .odkmedia import OnDemandChinaEpisodeIE from .odkmedia import OnDemandChinaEpisodeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .oftv import ( from .oftv import (
@ -1477,7 +1457,6 @@ from .platzi import (
PlatziCourseIE, PlatziCourseIE,
) )
from .playplustv import PlayPlusTVIE from .playplustv import PlayPlusTVIE
from .playstuff import PlayStuffIE
from .playsuisse import PlaySuisseIE from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE from .playtvak import PlaytvakIE
from .playwire import PlaywireIE from .playwire import PlaywireIE
@ -1599,7 +1578,6 @@ from .raywenderlich import (
RayWenderlichIE, RayWenderlichIE,
RayWenderlichCourseIE, RayWenderlichCourseIE,
) )
from .rbmaradio import RBMARadioIE
from .rbgtum import ( from .rbgtum import (
RbgTumIE, RbgTumIE,
RbgTumCourseIE, RbgTumCourseIE,
@ -1631,7 +1609,6 @@ from .redgifs import (
RedGifsUserIE, RedGifsUserIE,
) )
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .regiotv import RegioTVIE
from .rentv import ( from .rentv import (
RENTVIE, RENTVIE,
RENTVArticleIE, RENTVArticleIE,
@ -1640,6 +1617,7 @@ from .restudy import RestudyIE
from .reuters import ReutersIE from .reuters import ReutersIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
from .rheinmaintv import RheinMainTVIE from .rheinmaintv import RheinMainTVIE
from .ridehome import RideHomeIE
from .rinsefm import ( from .rinsefm import (
RinseFMIE, RinseFMIE,
RinseFMArtistPlaylistIE, RinseFMArtistPlaylistIE,
@ -1738,7 +1716,6 @@ from .safari import (
from .saitosan import SaitosanIE from .saitosan import SaitosanIE
from .samplefocus import SampleFocusIE from .samplefocus import SampleFocusIE
from .sapo import SapoIE from .sapo import SapoIE
from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .sbscokr import ( from .sbscokr import (
SBSCoKrIE, SBSCoKrIE,
@ -1758,7 +1735,6 @@ from .scte import (
SCTECourseIE, SCTECourseIE,
) )
from .scrolller import ScrolllerIE from .scrolller import ScrolllerIE
from .seeker import SeekerIE
from .sejmpl import SejmIE from .sejmpl import SejmIE
from .senalcolombia import SenalColombiaLiveIE from .senalcolombia import SenalColombiaLiveIE
from .senategov import SenateISVPIE, SenateGovIE from .senategov import SenateISVPIE, SenateGovIE
@ -1901,7 +1877,6 @@ from .storyfire import (
) )
from .streamable import StreamableIE from .streamable import StreamableIE
from .streamcz import StreamCZIE from .streamcz import StreamCZIE
from .streamff import StreamFFIE
from .streetvoice import StreetVoiceIE from .streetvoice import StreetVoiceIE
from .stretchinternet import StretchInternetIE from .stretchinternet import StretchInternetIE
from .stripchat import StripchatIE from .stripchat import StripchatIE
@ -1930,7 +1905,6 @@ from .tbsjp import (
TBSJPProgramIE, TBSJPProgramIE,
TBSJPPlaylistIE, TBSJPPlaylistIE,
) )
from .tdslifeway import TDSLifewayIE
from .teachable import ( from .teachable import (
TeachableIE, TeachableIE,
TeachableCourseIE, TeachableCourseIE,
@ -2500,6 +2474,7 @@ from .zee5 import (
Zee5SeriesIE, Zee5SeriesIE,
) )
from .zeenews import ZeeNewsIE from .zeenews import ZeeNewsIE
from .zenporn import ZenPornIE
from .zetland import ZetlandDKArticleIE from .zetland import ZetlandDKArticleIE
from .zhihu import ZhihuIE from .zhihu import ZhihuIE
from .zingmp3 import ( from .zingmp3 import (

@ -245,7 +245,6 @@ class ABCIViewIE(InfoExtractor):
'episode_id': 'NC2203H039S00', 'episode_id': 'NC2203H039S00',
'season_number': 2022, 'season_number': 2022,
'season': 'Season 2022', 'season': 'Season 2022',
'episode_number': None,
'episode': 'Locking Up Kids', 'episode': 'Locking Up Kids',
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg', 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/nc/NC2203H039S00_636d8a0944a22_1920.jpg',
'timestamp': 1668460497, 'timestamp': 1668460497,
@ -271,8 +270,6 @@ class ABCIViewIE(InfoExtractor):
'episode_id': 'RF2004Q043S00', 'episode_id': 'RF2004Q043S00',
'season_number': 2021, 'season_number': 2021,
'season': 'Season 2021', 'season': 'Season 2021',
'episode_number': None,
'episode': None,
'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg', 'thumbnail': 'https://cdn.iview.abc.net.au/thumbs/i/rf/RF2004Q043S00_61a950639dbc0_1920.jpg',
'timestamp': 1638710705, 'timestamp': 1638710705,

@ -259,7 +259,7 @@ class AbemaTVIE(AbemaTVBaseIE):
'title': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】', 'title': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series': 'ゆるキャン△ SEASON', 'series': 'ゆるキャン△ SEASON',
'episode': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】', 'episode': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series_number': 2, 'season_number': 2,
'episode_number': 1, 'episode_number': 1,
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17', 'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
}, },

@ -3,6 +3,7 @@ from ..utils import (
float_or_none, float_or_none,
format_field, format_field,
int_or_none, int_or_none,
str_or_none,
traverse_obj, traverse_obj,
parse_codecs, parse_codecs,
parse_qs, parse_qs,
@ -129,7 +130,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': '红孩儿之趴趴蛙寻石记 第5话 ', 'title': '红孩儿之趴趴蛙寻石记 第5话 ',
'duration': 760.0, 'duration': 760.0,
'season': '红孩儿之趴趴蛙寻石记', 'season': '红孩儿之趴趴蛙寻石记',
'season_id': 5023171, 'season_id': '5023171',
'season_number': 1, # series has only 1 season 'season_number': 1, # series has only 1 season
'episode': 'Episode 5', 'episode': 'Episode 5',
'episode_number': 5, 'episode_number': 5,
@ -146,7 +147,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': '叽歪老表(第二季) 第5话 坚不可摧', 'title': '叽歪老表(第二季) 第5话 坚不可摧',
'season': '叽歪老表(第二季)', 'season': '叽歪老表(第二季)',
'season_number': 2, 'season_number': 2,
'season_id': 6065485, 'season_id': '6065485',
'episode': '坚不可摧', 'episode': '坚不可摧',
'episode_number': 5, 'episode_number': 5,
'upload_date': '20220324', 'upload_date': '20220324',
@ -191,7 +192,7 @@ class AcFunBangumiIE(AcFunVideoBaseIE):
'title': json_bangumi_data.get('showTitle'), 'title': json_bangumi_data.get('showTitle'),
'thumbnail': json_bangumi_data.get('image'), 'thumbnail': json_bangumi_data.get('image'),
'season': json_bangumi_data.get('bangumiTitle'), 'season': json_bangumi_data.get('bangumiTitle'),
'season_id': season_id, 'season_id': str_or_none(season_id),
'season_number': season_number, 'season_number': season_number,
'episode': json_bangumi_data.get('title'), 'episode': json_bangumi_data.get('title'),
'episode_number': episode_number, 'episode_number': episode_number,

@ -22,7 +22,7 @@ class AltCensoredIE(InfoExtractor):
'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?", 'title': "QUELLES SONT LES CONSÉQUENCES DE L'HYPERSEXUALISATION DE LA SOCIÉTÉ ?",
'display_id': 'k0srjLSkga8.webm', 'display_id': 'k0srjLSkga8.webm',
'release_date': '20180403', 'release_date': '20180403',
'creator': 'Virginie Vota', 'creators': ['Virginie Vota'],
'release_year': 2018, 'release_year': 2018,
'upload_date': '20230318', 'upload_date': '20230318',
'uploader': 'admin@altcensored.com', 'uploader': 'admin@altcensored.com',
@ -32,7 +32,7 @@ class AltCensoredIE(InfoExtractor):
'duration': 926.09, 'duration': 926.09,
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int, 'view_count': int,
'categories': ['News & Politics'], 'categories': ['News & Politics'], # FIXME
} }
}] }]
@ -62,14 +62,21 @@ class AltCensoredChannelIE(InfoExtractor):
'title': 'Virginie Vota', 'title': 'Virginie Vota',
'id': 'UCFPTO55xxHqFqkzRZHu4kcw', 'id': 'UCFPTO55xxHqFqkzRZHu4kcw',
}, },
'playlist_count': 91 'playlist_count': 85,
}, { }, {
'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw', 'url': 'https://altcensored.com/channel/UC9CcJ96HKMWn0LZlcxlpFTw',
'info_dict': { 'info_dict': {
'title': 'yukikaze775', 'title': 'yukikaze775',
'id': 'UC9CcJ96HKMWn0LZlcxlpFTw', 'id': 'UC9CcJ96HKMWn0LZlcxlpFTw',
}, },
'playlist_count': 4 'playlist_count': 4,
}, {
'url': 'https://altcensored.com/channel/UCfYbb7nga6-icsFWWgS-kWw',
'info_dict': {
'title': 'Mister Metokur',
'id': 'UCfYbb7nga6-icsFWWgS-kWw',
},
'playlist_count': 121,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -78,7 +85,7 @@ class AltCensoredChannelIE(InfoExtractor):
url, channel_id, 'Download channel webpage', 'Unable to get channel webpage') url, channel_id, 'Download channel webpage', 'Unable to get channel webpage')
title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False) title = self._html_search_meta('altcen_title', webpage, 'title', fatal=False)
page_count = int_or_none(self._html_search_regex( page_count = int_or_none(self._html_search_regex(
r'<a[^>]+href="/channel/\w+/page/(\d+)">(?:\1)</a>', r'<a[^>]+href="/channel/[\w-]+/page/(\d+)">(?:\1)</a>',
webpage, 'page count', default='1')) webpage, 'page count', default='1'))
def page_func(page_num): def page_func(page_num):

@ -31,6 +31,7 @@ from ..utils import (
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urlhandle_detect_ext, urlhandle_detect_ext,
variadic,
) )
@ -49,7 +50,7 @@ class ArchiveOrgIE(InfoExtractor):
'release_date': '19681210', 'release_date': '19681210',
'timestamp': 1268695290, 'timestamp': 1268695290,
'upload_date': '20100315', 'upload_date': '20100315',
'creator': 'SRI International', 'creators': ['SRI International'],
'uploader': 'laura@archive.org', 'uploader': 'laura@archive.org',
'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr', 'display_id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.cdr',
@ -109,7 +110,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'Turning', 'title': 'Turning',
'ext': 'flac', 'ext': 'flac',
'track': 'Turning', 'track': 'Turning',
'creator': 'Grateful Dead', 'creators': ['Grateful Dead'],
'display_id': 'gd1977-05-08d01t01.flac', 'display_id': 'gd1977-05-08d01t01.flac',
'track_number': 1, 'track_number': 1,
'album': '1977-05-08 - Barton Hall - Cornell University', 'album': '1977-05-08 - Barton Hall - Cornell University',
@ -129,7 +130,7 @@ class ArchiveOrgIE(InfoExtractor):
'location': 'Barton Hall - Cornell University', 'location': 'Barton Hall - Cornell University',
'duration': 438.68, 'duration': 438.68,
'track': 'Deal', 'track': 'Deal',
'creator': 'Grateful Dead', 'creators': ['Grateful Dead'],
'album': '1977-05-08 - Barton Hall - Cornell University', 'album': '1977-05-08 - Barton Hall - Cornell University',
'release_date': '19770508', 'release_date': '19770508',
'display_id': 'gd1977-05-08d01t07.flac', 'display_id': 'gd1977-05-08d01t07.flac',
@ -167,7 +168,7 @@ class ArchiveOrgIE(InfoExtractor):
'upload_date': '20160610', 'upload_date': '20160610',
'description': 'md5:f70956a156645a658a0dc9513d9e78b7', 'description': 'md5:f70956a156645a658a0dc9513d9e78b7',
'uploader': 'dimitrios@archive.org', 'uploader': 'dimitrios@archive.org',
'creator': ['British Broadcasting Corporation', 'Time-Life Films'], 'creators': ['British Broadcasting Corporation', 'Time-Life Films'],
'timestamp': 1465594947, 'timestamp': 1465594947,
}, },
'playlist': [ 'playlist': [
@ -257,7 +258,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': m['title'], 'title': m['title'],
'description': clean_html(m.get('description')), 'description': clean_html(m.get('description')),
'uploader': dict_get(m, ['uploader', 'adder']), 'uploader': dict_get(m, ['uploader', 'adder']),
'creator': m.get('creator'), 'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'license': m.get('licenseurl'), 'license': m.get('licenseurl'),
'release_date': unified_strdate(m.get('date')), 'release_date': unified_strdate(m.get('date')),
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])), 'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
@ -272,7 +273,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': f.get('title') or f['name'], 'title': f.get('title') or f['name'],
'display_id': f['name'], 'display_id': f['name'],
'description': clean_html(f.get('description')), 'description': clean_html(f.get('description')),
'creator': f.get('creator'), 'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'duration': parse_duration(f.get('length')), 'duration': parse_duration(f.get('length')),
'track_number': int_or_none(f.get('track')), 'track_number': int_or_none(f.get('track')),
'album': f.get('album'), 'album': f.get('album'),
@ -300,7 +301,7 @@ class ArchiveOrgIE(InfoExtractor):
is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig')) is_logged_in = bool(self._get_cookies('https://archive.org').get('logged-in-sig'))
if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in): if extension in KNOWN_EXTENSIONS and (not f.get('private') or is_logged_in):
entry['formats'].append({ entry['formats'].append({
'url': 'https://archive.org/download/' + identifier + '/' + f['name'], 'url': 'https://archive.org/download/' + identifier + '/' + urllib.parse.quote(f['name']),
'format': f.get('format'), 'format': f.get('format'),
'width': int_or_none(f.get('width')), 'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')), 'height': int_or_none(f.get('height')),

@ -24,7 +24,8 @@ class AxsIE(InfoExtractor):
'timestamp': 1685729564, 'timestamp': 1685729564,
'duration': 1284.216, 'duration': 1284.216,
'series': 'Rock & Roll Road Trip with Sammy Hagar', 'series': 'Rock & Roll Road Trip with Sammy Hagar',
'season': 2, 'season': 'Season 2',
'season_number': 2,
'episode': '3', 'episode': '3',
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394', 'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
}, },
@ -41,7 +42,8 @@ class AxsIE(InfoExtractor):
'timestamp': 1676403615, 'timestamp': 1676403615,
'duration': 2570.668, 'duration': 2570.668,
'series': 'The Big Interview with Dan Rather', 'series': 'The Big Interview with Dan Rather',
'season': 3, 'season': 'Season 3',
'season_number': 3,
'episode': '5', 'episode': '5',
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32', 'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
}, },
@ -77,7 +79,7 @@ class AxsIE(InfoExtractor):
'title': ('title', {str}), 'title': ('title', {str}),
'description': ('description', {str}), 'description': ('description', {str}),
'series': ('seriestitle', {str}), 'series': ('seriestitle', {str}),
'season': ('season', {int}), 'season_number': ('season', {int}),
'episode': ('episode', {str}), 'episode': ('episode', {str}),
'duration': ('duration', {float_or_none}), 'duration': ('duration', {float_or_none}),
'timestamp': ('updated_at', {parse_iso8601}), 'timestamp': ('updated_at', {parse_iso8601}),

@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
str_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unified_timestamp, unified_timestamp,
@ -22,7 +23,7 @@ class BeegIE(InfoExtractor):
'age_limit': 18, 'age_limit': 18,
'upload_date': '20220131', 'upload_date': '20220131',
'timestamp': 1643656455, 'timestamp': 1643656455,
'display_id': 2540839, 'display_id': '2540839',
} }
}, { }, {
'url': 'https://beeg.com/-0599050563103750?t=4-861', 'url': 'https://beeg.com/-0599050563103750?t=4-861',
@ -36,7 +37,7 @@ class BeegIE(InfoExtractor):
'age_limit': 18, 'age_limit': 18,
'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9', 'description': 'md5:b4fc879a58ae6c604f8f259155b7e3b9',
'timestamp': 1643623200, 'timestamp': 1643623200,
'display_id': 2569965, 'display_id': '2569965',
'upload_date': '20220131', 'upload_date': '20220131',
} }
}, { }, {
@ -78,7 +79,7 @@ class BeegIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'display_id': first_fact.get('id'), 'display_id': str_or_none(first_fact.get('id')),
'title': traverse_obj(video, ('file', 'stuff', 'sf_name')), 'title': traverse_obj(video, ('file', 'stuff', 'sf_name')),
'description': traverse_obj(video, ('file', 'stuff', 'sf_story')), 'description': traverse_obj(video, ('file', 'stuff', 'sf_story')),
'timestamp': unified_timestamp(first_fact.get('fc_created')), 'timestamp': unified_timestamp(first_fact.get('fc_created')),

@ -32,7 +32,7 @@ class BellMediaIE(InfoExtractor):
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3', 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525', 'upload_date': '20180525',
'timestamp': 1527288600, 'timestamp': 1527288600,
'season_id': 73997, 'season_id': '73997',
'season': '2018', 'season': '2018',
'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg', 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
'tags': [], 'tags': [],

@ -93,7 +93,6 @@ class BFMTVArticleIE(BFMTVBaseIE):
'id': '6318445464112', 'id': '6318445464112',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe', 'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe',
'description': None,
'uploader_id': '876630703001', 'uploader_id': '876630703001',
'upload_date': '20230110', 'upload_date': '20230110',
'timestamp': 1673341692, 'timestamp': 1673341692,

@ -1996,7 +1996,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'title': get_element_by_class( 'title': get_element_by_class(
'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage), 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
'description': get_element_by_class( 'description': get_element_by_class(
'bstar-meta__desc', webpage) or self._html_search_meta('og:description'), 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
}, self._search_json_ld(webpage, video_id, default={})) }, self._search_json_ld(webpage, video_id, default={}))
def _get_comments_reply(self, root_id, next_id=0, display_id=None): def _get_comments_reply(self, root_id, next_id=0, display_id=None):

@ -185,7 +185,6 @@ class BitChuteChannelIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'UGlrF9o9b-Q', 'id': 'UGlrF9o9b-Q',
'ext': 'mp4', 'ext': 'mp4',
'filesize': None,
'title': 'This is the first video on #BitChute !', 'title': 'This is the first video on #BitChute !',
'description': 'md5:a0337e7b1fe39e32336974af8173a034', 'description': 'md5:a0337e7b1fe39e32336974af8173a034',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',

@ -4,10 +4,12 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
str_or_none,
) )
class BleacherReportIE(InfoExtractor): class BleacherReportIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football', 'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football',
@ -16,7 +18,7 @@ class BleacherReportIE(InfoExtractor):
'id': '2496438', 'id': '2496438',
'ext': 'mp4', 'ext': 'mp4',
'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?', 'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?',
'uploader_id': 3992341, 'uploader_id': '3992341',
'description': 'CFB, ACC, Florida State', 'description': 'CFB, ACC, Florida State',
'timestamp': 1434380212, 'timestamp': 1434380212,
'upload_date': '20150615', 'upload_date': '20150615',
@ -33,7 +35,7 @@ class BleacherReportIE(InfoExtractor):
'timestamp': 1446839961, 'timestamp': 1446839961,
'uploader': 'Sean Fay', 'uploader': 'Sean Fay',
'description': 'md5:b1601e2314c4d8eec23b6eafe086a757', 'description': 'md5:b1601e2314c4d8eec23b6eafe086a757',
'uploader_id': 6466954, 'uploader_id': '6466954',
'upload_date': '20151011', 'upload_date': '20151011',
}, },
'add_ie': ['Youtube'], 'add_ie': ['Youtube'],
@ -58,7 +60,7 @@ class BleacherReportIE(InfoExtractor):
'id': article_id, 'id': article_id,
'title': article_data['title'], 'title': article_data['title'],
'uploader': article_data.get('author', {}).get('name'), 'uploader': article_data.get('author', {}).get('name'),
'uploader_id': article_data.get('authorId'), 'uploader_id': str_or_none(article_data.get('authorId')),
'timestamp': parse_iso8601(article_data.get('createdAt')), 'timestamp': parse_iso8601(article_data.get('createdAt')),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'comment_count': int_or_none(article_data.get('commentsCount')), 'comment_count': int_or_none(article_data.get('commentsCount')),
@ -82,6 +84,7 @@ class BleacherReportIE(InfoExtractor):
class BleacherReportCMSIE(AMPIE): class BleacherReportCMSIE(AMPIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})' _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
_TESTS = [{ _TESTS = [{
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',

@ -76,6 +76,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE
class CBSIE(CBSBaseIE): class CBSIE(CBSBaseIE):
_WORKING = False
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
cbs:| cbs:|

@ -1,98 +0,0 @@
from .cbs import CBSIE
from ..utils import int_or_none
class CBSInteractiveIE(CBSIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?(?P<site>cnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P<id>[^/?]+)'
_TESTS = [{
'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/',
'info_dict': {
'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00',
'display_id': 'hands-on-with-microsofts-windows-8-1-update',
'ext': 'mp4',
'title': 'Hands-on with Microsoft Windows 8.1 Update',
'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.',
'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861',
'uploader': 'Sarah Mitroff',
'duration': 70,
'timestamp': 1396479627,
'upload_date': '20140402',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/',
'md5': 'f11d27b2fa18597fbf92444d2a9ed386',
'info_dict': {
'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK',
'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187',
'ext': 'mp4',
'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)',
'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f',
'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40',
'uploader': 'Ashley Esqueda',
'duration': 1482,
'timestamp': 1433289889,
'upload_date': '20150603',
},
}, {
'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/',
'info_dict': {
'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt',
'display_id': 'video-keeping-android-smartphones-and-tablets-secure',
'ext': 'mp4',
'title': 'Video: Keeping Android smartphones and tablets secure',
'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.',
'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0',
'uploader': 'Adrian Kingsley-Hughes',
'duration': 731,
'timestamp': 1449129925,
'upload_date': '20151203',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/',
'only_matching': True,
}]
MPX_ACCOUNTS = {
'cnet': 2198311517,
'zdnet': 2387448114,
}
def _real_extract(self, url):
site, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex(
r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'",
webpage, 'data json')
data = self._parse_json(data_json, display_id)
vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0]
video_id = vdata['mpxRefId']
title = vdata['title']
author = vdata.get('author')
if author:
uploader = '%s %s' % (author['firstName'], author['lastName'])
uploader_id = author.get('id')
else:
uploader = None
uploader_id = None
info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site])
info.update({
'id': video_id,
'display_id': display_id,
'title': title,
'duration': int_or_none(vdata.get('duration')),
'uploader': uploader,
'uploader_id': uploader_id,
})
return info

@ -8,6 +8,7 @@ from ..utils import (
# class CBSSportsEmbedIE(CBSBaseIE): # class CBSSportsEmbedIE(CBSBaseIE):
class CBSSportsEmbedIE(InfoExtractor): class CBSSportsEmbedIE(InfoExtractor):
_WORKING = False
IE_NAME = 'cbssports:embed' IE_NAME = 'cbssports:embed'
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+? _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
(?: (?:
@ -75,6 +76,7 @@ class CBSSportsBaseIE(InfoExtractor):
class CBSSportsIE(CBSSportsBaseIE): class CBSSportsIE(CBSSportsBaseIE):
_WORKING = False
IE_NAME = 'cbssports' IE_NAME = 'cbssports'
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
@ -92,6 +94,7 @@ class CBSSportsIE(CBSSportsBaseIE):
class TwentyFourSevenSportsIE(CBSSportsBaseIE): class TwentyFourSevenSportsIE(CBSSportsBaseIE):
_WORKING = False
IE_NAME = '247sports' IE_NAME = '247sports'
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
_TESTS = [{ _TESTS = [{

@ -88,6 +88,20 @@ class CCTVIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# videoCenterId: "id"
'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
'info_dict': {
'id': '5c846c0518444308ba32c4159df3b3e0',
'ext': 'mp4',
'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集风物长宜放眼量',
'uploader': 'yangjuan',
'timestamp': 1708554940,
'upload_date': '20240221',
},
'params': {
'skip_download': True,
},
}, { }, {
# var ids = ["id"] # var ids = ["id"]
'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml', 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
@ -128,7 +142,7 @@ class CCTVIE(InfoExtractor):
video_id = self._search_regex( video_id = self._search_regex(
[r'var\s+guid\s*=\s*["\']([\da-fA-F]+)', [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)', r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
r'changePlayer\s*\(\s*["\']([\da-fA-F]+)', r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)', r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)', r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',

@ -51,7 +51,7 @@ class CeskaTelevizeIE(InfoExtractor):
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
'only_matching': True, 'only_matching': True,
'info_dict': { 'info_dict': {
'id': 402, 'id': '402',
'ext': 'mp4', 'ext': 'mp4',
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True, 'is_live': True,

@ -17,6 +17,7 @@ class CGTNIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1615295940, 'timestamp': 1615295940,
'upload_date': '20210309', 'upload_date': '20210309',
'categories': ['Video'],
}, },
'params': { 'params': {
'skip_download': True 'skip_download': True
@ -29,8 +30,8 @@ class CGTNIE(InfoExtractor):
'title': 'China, Indonesia vow to further deepen maritime cooperation', 'title': 'China, Indonesia vow to further deepen maritime cooperation',
'thumbnail': r're:^https?://.*\.png$', 'thumbnail': r're:^https?://.*\.png$',
'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.', 'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.',
'author': 'CGTN', 'creators': ['CGTN'],
'category': 'China', 'categories': ['China'],
'timestamp': 1622950200, 'timestamp': 1622950200,
'upload_date': '20210606', 'upload_date': '20210606',
}, },
@ -45,7 +46,12 @@ class CGTNIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
download_url = self._html_search_regex(r'data-video ="(?P<url>.+m3u8)"', webpage, 'download_url') download_url = self._html_search_regex(r'data-video ="(?P<url>.+m3u8)"', webpage, 'download_url')
datetime_str = self._html_search_regex(r'<span class="date">\s*(.+?)\s*</span>', webpage, 'datetime_str', fatal=False) datetime_str = self._html_search_regex(
r'<span class="date">\s*(.+?)\s*</span>', webpage, 'datetime_str', fatal=False)
category = self._html_search_regex(
r'<span class="section">\s*(.+?)\s*</span>', webpage, 'category', fatal=False)
author = self._search_regex(
r'<div class="news-author-name">\s*(.+?)\s*</div>', webpage, 'author', default=None)
return { return {
'id': video_id, 'id': video_id,
@ -53,9 +59,7 @@ class CGTNIE(InfoExtractor):
'description': self._og_search_description(webpage, default=None), 'description': self._og_search_description(webpage, default=None),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'), 'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'),
'category': self._html_search_regex(r'<span class="section">\s*(.+?)\s*</span>', 'categories': [category] if category else None,
webpage, 'category', fatal=False), 'creators': [author] if author else None,
'author': self._html_search_regex(r'<div class="news-author-name">\s*(.+?)\s*</div>',
webpage, 'author', default=None, fatal=False),
'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600), 'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600),
} }

@ -1,207 +0,0 @@
import itertools
import json
import urllib.parse
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
str_to_int,
url_or_none,
)
class ChingariBaseIE(InfoExtractor):
def _get_post(self, id, post_data):
media_data = post_data['mediaLocation']
base_url = media_data['base']
author_data = post_data.get('authorData', {})
song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author'
formats = [{
'format_id': frmt,
'width': str_to_int(frmt[1:]),
'url': base_url + frmt_path,
} for frmt, frmt_path in media_data.get('transcoded', {}).items()]
if media_data.get('path'):
formats.append({
'format_id': 'original',
'format_note': 'Direct video.',
'url': base_url + '/apipublic' + media_data['path'],
'quality': 10,
})
timestamp = str_to_int(post_data.get('created_at'))
if timestamp:
timestamp = int_or_none(timestamp, 1000)
thumbnail, uploader_url = None, None
if media_data.get('thumbnail'):
thumbnail = base_url + media_data.get('thumbnail')
if author_data.get('username'):
uploader_url = 'https://chingari.io/' + author_data.get('username')
return {
'id': id,
'extractor_key': ChingariIE.ie_key(),
'extractor': 'Chingari',
'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
'duration': media_data.get('duration'),
'thumbnail': url_or_none(thumbnail),
'like_count': post_data.get('likeCount'),
'view_count': post_data.get('viewsCount'),
'comment_count': post_data.get('commentCount'),
'repost_count': post_data.get('shareCount'),
'timestamp': timestamp,
'uploader_id': post_data.get('userId') or author_data.get('_id'),
'uploader': author_data.get('name'),
'uploader_url': url_or_none(uploader_url),
'track': song_data.get('title'),
'artist': song_data.get('author'),
'formats': formats,
}
class ChingariIE(ChingariBaseIE):
_VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
_TESTS = [{
'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
'info_dict': {
'id': '612f8f4ce1dc57090e8a7beb',
'ext': 'mp4',
'title': 'Happy birthday Srila Prabhupada',
'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa',
'duration': 0,
'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg',
'like_count': int,
'view_count': int,
'comment_count': int,
'repost_count': int,
'timestamp': 1630506828,
'upload_date': '20210901',
'uploader_id': '5f0403982c8bd344f4813f8c',
'uploader': 'ISKCON,Inc.',
'uploader_url': 'https://chingari.io/iskcon,inc',
'track': None,
'artist': None,
},
'params': {'skip_download': True}
}]
def _real_extract(self, url):
id = self._match_id(url)
post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id)
if post_json['code'] != 200:
raise ExtractorError(post_json['message'], expected=True)
post_data = post_json['data']
return self._get_post(id, post_data)
class ChingariUserIE(ChingariBaseIE):
_VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
_TESTS = [{
'url': 'https://chingari.io/dada1023',
'info_dict': {
'id': 'dada1023',
},
'params': {'playlistend': 3},
'playlist': [{
'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a',
'info_dict': {
'id': '614781f3ade60b3a0bfff42a',
'ext': 'mp4',
'title': '#chingaribappa ',
'description': 'md5:d1df21d84088770468fa63afe3b17857',
'duration': 7,
'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg',
'like_count': int,
'view_count': int,
'comment_count': int,
'repost_count': int,
'timestamp': 1632076275,
'upload_date': '20210919',
'uploader_id': '5efc4b12cca35c3d1794c2d3',
'uploader': 'dada (girish) dhawale',
'uploader_url': 'https://chingari.io/dada1023',
'track': None,
'artist': None
},
'params': {'skip_download': True}
}, {
'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba',
'info_dict': {
'id': '6146b132bcbf860959e12cba',
'ext': 'mp4',
'title': 'Tactor harvesting',
'description': 'md5:8403f12dce68828b77ecee7eb7e887b7',
'duration': 59.3,
'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg',
'like_count': int,
'view_count': int,
'comment_count': int,
'repost_count': int,
'timestamp': 1632022834,
'upload_date': '20210919',
'uploader_id': '5efc4b12cca35c3d1794c2d3',
'uploader': 'dada (girish) dhawale',
'uploader_url': 'https://chingari.io/dada1023',
'track': None,
'artist': None
},
'params': {'skip_download': True}
}, {
'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82',
'info_dict': {
'id': '6145651b74cb030a64c40b82',
'ext': 'mp4',
'title': '#odiabhajan ',
'description': 'md5:687ea36835b9276cf2af90f25e7654cb',
'duration': 56.67,
'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg',
'like_count': int,
'view_count': int,
'comment_count': int,
'repost_count': int,
'timestamp': 1631937819,
'upload_date': '20210918',
'uploader_id': '5efc4b12cca35c3d1794c2d3',
'uploader': 'dada (girish) dhawale',
'uploader_url': 'https://chingari.io/dada1023',
'track': None,
'artist': None
},
'params': {'skip_download': True}
}],
}, {
'url': 'https://chingari.io/iskcon%2Cinc',
'playlist_mincount': 1025,
'info_dict': {
'id': 'iskcon%2Cinc',
},
}]
def _entries(self, id):
skip = 0
has_more = True
for page in itertools.count():
posts = self._download_json('https://api.chingari.io/users/getPosts', id,
data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(),
headers={'content-type': 'application/json;charset=UTF-8'},
note='Downloading page %s' % page)
for post in posts.get('data', []):
post_data = post['post']
yield self._get_post(post_data['_id'], post_data)
skip += 20
has_more = posts['hasMoreData']
if not has_more:
break
def _real_extract(self, url):
alt_id = self._match_id(url)
post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id)
if post_json['code'] != 200:
raise ExtractorError(post_json['message'], expected=True)
id = post_json['data']['_id']
return self.playlist_result(self._entries(id), playlist_id=alt_id)

@ -2,7 +2,7 @@ import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, UserNotLive,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
@ -40,7 +40,7 @@ class CHZZKLiveIE(InfoExtractor):
note='Downloading channel info', errnote='Unable to download channel info')['content'] note='Downloading channel info', errnote='Unable to download channel info')['content']
if live_detail.get('status') == 'CLOSE': if live_detail.get('status') == 'CLOSE':
raise ExtractorError('The channel is not currently live', expected=True) raise UserNotLive(video_id=channel_id)
live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id) live_playback = self._parse_json(live_detail['livePlaybackJson'], channel_id)

@ -2,6 +2,7 @@ from .hbo import HBOBaseIE
class CinemaxIE(HBOBaseIE): class CinemaxIE(HBOBaseIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))' _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))'
_TESTS = [{ _TESTS = [{
'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903', 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',

@ -1,76 +0,0 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
url_or_none,
)
class CliphunterIE(InfoExtractor):
IE_NAME = 'cliphunter'
_VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
(?P<id>[0-9]+)/
(?P<seo>.+?)(?:$|[#\?])
'''
_TESTS = [{
'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo',
'md5': 'b7c9bbd4eb3a226ab91093714dcaa480',
'info_dict': {
'id': '1012420',
'ext': 'flv',
'title': 'Fun Jynx Maze solo',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
},
'skip': 'Video gone',
}, {
'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz',
'md5': '55a723c67bfc6da6b0cfa00d55da8a27',
'info_dict': {
'id': '2019449',
'ext': 'mp4',
'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._search_regex(
r'mediaTitle = "([^"]+)"', webpage, 'title')
gexo_files = self._parse_json(
self._search_regex(
r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'),
video_id)
formats = []
for format_id, f in gexo_files.items():
video_url = url_or_none(f.get('url'))
if not video_url:
continue
fmt = f.get('fmt')
height = f.get('h')
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
formats.append({
'url': video_url,
'format_id': format_id,
'width': int_or_none(f.get('w')),
'height': int_or_none(height),
'tbr': int_or_none(f.get('br')),
})
thumbnail = self._search_regex(
r"var\s+mov_thumb\s*=\s*'([^']+)';",
webpage, 'thumbnail', fatal=False)
return {
'id': video_id,
'title': video_title,
'formats': formats,
'age_limit': self._rta_search(webpage),
'thumbnail': thumbnail,
}

@ -2,6 +2,7 @@ from .onet import OnetBaseIE
class ClipRsIE(OnetBaseIE): class ClipRsIE(OnetBaseIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+' _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
_TEST = { _TEST = {
'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732', 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',

@ -4,6 +4,7 @@ from .common import InfoExtractor
class CloserToTruthIE(InfoExtractor): class CloserToTruthIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',

@ -4,27 +4,25 @@ from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor): class CloudflareStreamIE(InfoExtractor):
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = r'''(?x) _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
https?:// _EMBED_REGEX = [
(?: rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1',
(?:watch\.)?%s/| rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
%s ]
)
(?P<id>%s)
''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
_EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': { 'info_dict': {
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': 'm3u8',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -35,6 +33,21 @@ class CloudflareStreamIE(InfoExtractor):
}, { }, {
'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
'info_dict': {
'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
},
'params': {
'skip_download': 'm3u8',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

@ -1,68 +1,97 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class CNBCIE(InfoExtractor): class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html'
_TEST = {
'url': 'http://video.cnbc.com/gallery/?video=3000503714', _TESTS = [{
'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html',
'info_dict': { 'info_dict': {
'id': '3000503714',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Fighting zombies is big business', 'id': '107344774',
'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', 'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand',
'timestamp': 1459332000, 'modified_timestamp': 1702053483,
'upload_date': '20160330', 'timestamp': 1701977810,
'uploader': 'NBCU-CNBC', 'channel': 'News Videos',
'upload_date': '20231207',
'description': 'md5:882c001d85cb43d7579b514307b3e78b',
'release_timestamp': 1701977375,
'modified_date': '20231208',
'release_date': '20231207',
'duration': 65,
'creators': ['Sean Conlon'],
'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855',
}, },
'params': { 'expected_warnings': ['Unable to download f4m manifest'],
# m3u8 download }, {
'skip_download': True, 'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html',
},
'skip': 'Dead link',
}
def _real_extract(self, url):
video_id = self._match_id(url)
return {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'url': smuggle_url(
'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
{'force_smil_url': True}),
'id': video_id,
}
class CNBCVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
_TEST = {
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
'info_dict': { 'info_dict': {
'id': '7000031301', 'creators': ['Jim Cramer'],
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 299.0,
'ext': 'mp4', 'ext': 'mp4',
'title': "Trump: I don't necessarily agree with raising rates", 'id': '107345451',
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', 'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene',
'timestamp': 1531958400, 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430',
'upload_date': '20180719', 'timestamp': 1702080139,
'uploader': 'NBCU-CNBC', 'title': 'Jim Cramer shares his take on Seattle\'s tech scene',
'release_date': '20231208',
'upload_date': '20231209',
'modified_timestamp': 1702080139,
'modified_date': '20231209',
'release_timestamp': 1702073551,
}, },
'params': { 'expected_warnings': ['Unable to download f4m manifest'],
'skip_download': True, }, {
'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html',
'info_dict': {
'creators': ['Jim Cramer'],
'channel': 'Mad Money with Jim Cramer',
'description': 'md5:72925be21b952e95eba51178dddf4e3e',
'duration': 113.0,
'ext': 'mp4',
'id': '107345474',
'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer',
'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248',
'timestamp': 1702080535,
'title': 'The epicenter of AI is in Seattle, says Jim Cramer',
'release_timestamp': 1702077347,
'modified_timestamp': 1702080535,
'release_date': '20231208',
'upload_date': '20231209',
'modified_date': '20231209',
}, },
'skip': 'Dead link', 'expected_warnings': ['Unable to download f4m manifest'],
} }]
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = self._match_valid_url(url).groups() display_id = self._match_id(url)
video_id = self._download_json( webpage = self._download_webpage(url, display_id)
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id)
'query': '''{
page(path: "%s") { player_data = traverse_obj(data, (
vcpsId 'page', 'page', 'layout', ..., 'columns', ..., 'modules',
} lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False)
}''' % path,
})['data']['page']['vcpsId'] return {
return self.url_result( 'id': display_id,
'http://video.cnbc.com/gallery/?video=%d' % video_id, 'display_id': display_id,
CNBCIE.ie_key()) 'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id),
**self._search_json_ld(webpage, display_id, fatal=False),
**traverse_obj(player_data, {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'creators': ('author', ..., 'name', {str}),
'timestamp': ('datePublished', {parse_iso8601}),
'release_timestamp': ('uploadDate', {parse_iso8601}),
'modified_timestamp': ('dateLastPublished', {parse_iso8601}),
'thumbnail': ('thumbnail', {url_or_none}),
'duration': ('duration', {int_or_none}),
'channel': ('section', 'title', {str}),
}),
}

@ -247,6 +247,8 @@ class InfoExtractor:
(For internal use only) (For internal use only)
* http_chunk_size Chunk size for HTTP downloads * http_chunk_size Chunk size for HTTP downloads
* ffmpeg_args Extra arguments for ffmpeg downloader * ffmpeg_args Extra arguments for ffmpeg downloader
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url, RTMP formats can also have the additional fields: page_url,
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
rtmp_protocol, rtmp_real_time rtmp_protocol, rtmp_real_time
@ -260,7 +262,7 @@ class InfoExtractor:
direct: True if a direct video file was given (must only be set by GenericIE) direct: True if a direct video file was given (must only be set by GenericIE)
alt_title: A secondary title of the video. alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily display_id: An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is unique, but available before title. Typically, id is
something like "4234987", title "Dancing naked mole rats", something like "4234987", title "Dancing naked mole rats",
and display_id "dancing-naked-mole-rats" and display_id "dancing-naked-mole-rats"
@ -278,7 +280,7 @@ class InfoExtractor:
description: Full video description. description: Full video description.
uploader: Full name of the video uploader. uploader: Full name of the video uploader.
license: License name the video is licensed under. license: License name the video is licensed under.
creator: The creator of the video. creators: List of creators of the video.
timestamp: UNIX timestamp of the moment the video was uploaded timestamp: UNIX timestamp of the moment the video was uploaded
upload_date: Video upload date in UTC (YYYYMMDD). upload_date: Video upload date in UTC (YYYYMMDD).
If not explicitly set, calculated from timestamp If not explicitly set, calculated from timestamp
@ -422,16 +424,16 @@ class InfoExtractor:
track_number: Number of the track within an album or a disc, as an integer. track_number: Number of the track within an album or a disc, as an integer.
track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
as a unicode string. as a unicode string.
artist: Artist(s) of the track. artists: List of artists of the track.
genre: Genre(s) of the track. composers: List of composers of the piece.
genres: List of genres of the track.
album: Title of the album the track belongs to. album: Title of the album the track belongs to.
album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
album_artist: List of all artists appeared on the album (e.g. album_artists: List of all artists appeared on the album.
"Ash Borer / Fell Voices" or "Various Artists", useful for splits E.g. ["Ash Borer", "Fell Voices"] or ["Various Artists"].
and compilations). Useful for splits and compilations.
disc_number: Number of the disc or other physical medium the track belongs to, disc_number: Number of the disc or other physical medium the track belongs to,
as an integer. as an integer.
composer: Composer of the piece
The following fields should only be set for clips that should be cut from the original video: The following fields should only be set for clips that should be cut from the original video:
@ -442,6 +444,18 @@ class InfoExtractor:
rows: Number of rows in each storyboard fragment, as an integer rows: Number of rows in each storyboard fragment, as an integer
columns: Number of columns in each storyboard fragment, as an integer columns: Number of columns in each storyboard fragment, as an integer
The following fields are deprecated and should not be set by new code:
composer: Use "composers" instead.
Composer(s) of the piece, comma-separated.
artist: Use "artists" instead.
Artist(s) of the track, comma-separated.
genre: Use "genres" instead.
Genre(s) of the track, comma-separated.
album_artist: Use "album_artists" instead.
All artists appeared on the album, comma-separated.
creator: Use "creators" instead.
The creator of the video.
Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information. Unless mentioned otherwise, None is equivalent to absence of information.
@ -2530,7 +2544,11 @@ class InfoExtractor:
self._report_ignoring_subs('DASH') self._report_ignoring_subs('DASH')
return fmts return fmts
def _extract_mpd_formats_and_subtitles( def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
periods = self._extract_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _extract_mpd_periods(
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
fatal=True, data=None, headers={}, query={}): fatal=True, data=None, headers={}, query={}):
@ -2543,17 +2561,16 @@ class InfoExtractor:
errnote='Failed to download MPD manifest' if errnote is None else errnote, errnote='Failed to download MPD manifest' if errnote is None else errnote,
fatal=fatal, data=data, headers=headers, query=query) fatal=fatal, data=data, headers=headers, query=query)
if res is False: if res is False:
return [], {} return []
mpd_doc, urlh = res mpd_doc, urlh = res
if mpd_doc is None: if mpd_doc is None:
return [], {} return []
# We could have been redirected to a new url when we retrieved our mpd file. # We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.url mpd_url = urlh.url
mpd_base_url = base_url(mpd_url) mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles( return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
mpd_doc, mpd_id, mpd_base_url, mpd_url)
def _parse_mpd_formats(self, *args, **kwargs): def _parse_mpd_formats(self, *args, **kwargs):
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs) fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
@ -2561,8 +2578,39 @@ class InfoExtractor:
self._report_ignoring_subs('DASH') self._report_ignoring_subs('DASH')
return fmts return fmts
def _parse_mpd_formats_and_subtitles( def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): periods = self._parse_mpd_periods(*args, **kwargs)
return self._merge_mpd_periods(periods)
def _merge_mpd_periods(self, periods):
"""
Combine all formats and subtitles from an MPD manifest into a single list,
by concatenate streams with similar formats.
"""
formats, subtitles = {}, {}
for period in periods:
for f in period['formats']:
assert 'is_dash_periods' not in f, 'format already processed'
f['is_dash_periods'] = True
format_key = tuple(v for k, v in f.items() if k not in (
('format_id', 'fragments', 'manifest_stream_number')))
if format_key not in formats:
formats[format_key] = f
elif 'fragments' in f:
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
if subtitles and period['subtitles']:
self.report_warning(bug_reports_message(
'Found subtitles in multiple periods in the DASH manifest; '
'if part of the subtitles are missing,'
), only_once=True)
for sub_lang, sub_info in period['subtitles'].items():
subtitles.setdefault(sub_lang, []).extend(sub_info)
return list(formats.values()), subtitles
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
""" """
Parse formats from MPD manifest. Parse formats from MPD manifest.
References: References:
@ -2641,9 +2689,13 @@ class InfoExtractor:
return ms_info return ms_info
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
formats, subtitles = [], {}
stream_numbers = collections.defaultdict(int) stream_numbers = collections.defaultdict(int)
for period in mpd_doc.findall(_add_ns('Period')): for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
period_entry = {
'id': period.get('id', f'period-{period_idx}'),
'formats': [],
'subtitles': collections.defaultdict(list),
}
period_duration = parse_duration(period.get('duration')) or mpd_duration period_duration = parse_duration(period.get('duration')) or mpd_duration
period_ms_info = extract_multisegment_info(period, { period_ms_info = extract_multisegment_info(period, {
'start_number': 1, 'start_number': 1,
@ -2893,11 +2945,10 @@ class InfoExtractor:
if content_type in ('video', 'audio', 'image/jpeg'): if content_type in ('video', 'audio', 'image/jpeg'):
f['manifest_stream_number'] = stream_numbers[f['url']] f['manifest_stream_number'] = stream_numbers[f['url']]
stream_numbers[f['url']] += 1 stream_numbers[f['url']] += 1
formats.append(f) period_entry['formats'].append(f)
elif content_type == 'text': elif content_type == 'text':
subtitles.setdefault(lang or 'und', []).append(f) period_entry['subtitles'][lang or 'und'].append(f)
yield period_entry
return formats, subtitles
def _extract_ism_formats(self, *args, **kwargs): def _extract_ism_formats(self, *args, **kwargs):
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)

@ -65,7 +65,7 @@ class CPACIE(InfoExtractor):
'title': title, 'title': title,
'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))), 'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))),
'timestamp': unified_timestamp(content['details'].get('liveDateTime')), 'timestamp': unified_timestamp(content['details'].get('liveDateTime')),
'category': [category] if category else None, 'categories': [category] if category else None,
'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))), 'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))),
'is_live': is_live(content['details'].get('type')), 'is_live': is_live(content['details'].get('type')),
} }

@ -1,12 +1,13 @@
import json
from .brightcove import BrightcoveNewIE from .brightcove import BrightcoveNewIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
dict_get, extract_attributes,
get_element_by_id, get_element_html_by_class,
js_to_json, get_element_text_and_html_by_tag,
traverse_obj,
) )
from ..utils.traversal import traverse_obj
class CraftsyIE(InfoExtractor): class CraftsyIE(InfoExtractor):
@ -41,28 +42,34 @@ class CraftsyIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_data = self._parse_json(self._search_regex( video_player = get_element_html_by_class('class-video-player', webpage)
r'class_video_player_vars\s*=\s*({.*})\s*;', video_data = traverse_obj(video_player, (
get_element_by_id('vidstore-classes_class-video-player-js-extra', webpage), {extract_attributes}, 'wire:snapshot', {json.loads}, 'data', {dict})) or {}
'video data'), video_id, transform_source=js_to_json) video_js = traverse_obj(video_player, (
{lambda x: get_element_text_and_html_by_tag('video-js', x)}, 1, {extract_attributes})) or {}
has_access = video_data.get('userHasAccess')
lessons = traverse_obj(video_data, ('lessons', ..., ..., lambda _, v: v['video_id']))
preview_id = video_js.get('data-video-id')
if preview_id and preview_id not in traverse_obj(lessons, (..., 'video_id')):
if not lessons and not has_access:
self.report_warning(
'Only extracting preview. For the full class, pass cookies '
+ f'from an account that has access. {self._login_hint()}')
lessons.append({'video_id': preview_id})
account_id = traverse_obj(video_data, ('video_player', 'bc_account_id')) if not lessons and not has_access:
self.raise_login_required('You do not have access to this class')
entries = [] account_id = video_data.get('accountId') or video_js['data-account']
class_preview = traverse_obj(video_data, ('video_player', 'class_preview'))
if class_preview:
v_id = class_preview.get('video_id')
entries.append(self.url_result(
f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={v_id}',
BrightcoveNewIE, v_id, class_preview.get('title')))
if dict_get(video_data, ('is_free', 'user_has_access')): def entries(lessons):
entries += [ for lesson in lessons:
self.url_result( yield self.url_result(
f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={lesson["video_id"]}', f'http://players.brightcove.net/{account_id}/default_default/index.html?videoId={lesson["video_id"]}',
BrightcoveNewIE, lesson['video_id'], lesson.get('title')) BrightcoveNewIE, lesson['video_id'], lesson.get('title'))
for lesson in video_data['lessons']]
return self.playlist_result( return self.playlist_result(
entries, video_id, video_data.get('class_title'), entries(lessons), video_id, self._html_search_meta(('og:title', 'twitter:title'), webpage),
self._html_search_meta(('og:description', 'description'), webpage, default=None)) self._html_search_meta(('og:description', 'description'), webpage, default=None))

@ -514,7 +514,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'track': 'Egaono Hana', 'track': 'Egaono Hana',
'artist': 'Goose house', 'artist': 'Goose house',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genre': ['J-Pop'], 'genres': ['J-Pop'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -527,7 +527,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'track': 'Crossing Field', 'track': 'Crossing Field',
'artist': 'LiSA', 'artist': 'LiSA',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genre': ['Anime'], 'genres': ['Anime'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -541,7 +541,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'artist': 'LiSA', 'artist': 'LiSA',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'description': 'md5:747444e7e6300907b7a43f0a0503072e', 'description': 'md5:747444e7e6300907b7a43f0a0503072e',
'genre': ['J-Pop'], 'genres': ['J-Pop'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -594,7 +594,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'width': ('width', {int_or_none}), 'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}), 'height': ('height', {int_or_none}),
}), }),
'genre': ('genres', ..., 'displayValue'), 'genres': ('genres', ..., 'displayValue'),
'age_limit': ('maturity_ratings', -1, {parse_age_limit}), 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
}), }),
} }
@ -611,7 +611,7 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
'info_dict': { 'info_dict': {
'id': 'MA179CB50D', 'id': 'MA179CB50D',
'title': 'LiSA', 'title': 'LiSA',
'genre': ['J-Pop', 'Anime', 'Rock'], 'genres': ['J-Pop', 'Anime', 'Rock'],
'description': 'md5:16d87de61a55c3f7d6c454b73285938e', 'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
}, },
'playlist_mincount': 83, 'playlist_mincount': 83,
@ -645,6 +645,6 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
'width': ('width', {int_or_none}), 'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}), 'height': ('height', {int_or_none}),
}), }),
'genre': ('genres', ..., 'displayValue'), 'genres': ('genres', ..., 'displayValue'),
}), }),
} }

@ -114,7 +114,7 @@ class CybraryCourseIE(CybraryBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies', 'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
'info_dict': { 'info_dict': {
'id': 898, 'id': '898',
'title': 'AZ-500: Microsoft Azure Security Technologies', 'title': 'AZ-500: Microsoft Azure Security Technologies',
'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4' 'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4'
}, },
@ -122,7 +122,7 @@ class CybraryCourseIE(CybraryBaseIE):
}, { }, {
'url': 'https://app.cybrary.it/browse/course/cybrary-orientation', 'url': 'https://app.cybrary.it/browse/course/cybrary-orientation',
'info_dict': { 'info_dict': {
'id': 1245, 'id': '1245',
'title': 'Cybrary Orientation', 'title': 'Cybrary Orientation',
'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e' 'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e'
}, },

@ -1,6 +1,7 @@
import functools import functools
import json import json
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
@ -44,36 +45,41 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit')) self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off') self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
def _get_token(self, xid):
cookies = self._get_dailymotion_cookies()
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
if token:
return token
data = {
'client_id': 'f1a362d288c1b98099c7',
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
}
username, password = self._get_login_info()
if username:
data.update({
'grant_type': 'password',
'password': password,
'username': username,
})
else:
data['grant_type'] = 'client_credentials'
try:
token = self._download_json(
'https://graphql.api.dailymotion.com/oauth/token',
None, 'Downloading Access Token',
data=urlencode_postdata(data))['access_token']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError(self._parse_json(
e.cause.response.read().decode(), xid)['error_description'], expected=True)
raise
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
return token
def _call_api(self, object_type, xid, object_fields, note, filter_extra=None): def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
if not self._HEADERS.get('Authorization'): if not self._HEADERS.get('Authorization'):
cookies = self._get_dailymotion_cookies() self._HEADERS['Authorization'] = f'Bearer {self._get_token(xid)}'
token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
if not token:
data = {
'client_id': 'f1a362d288c1b98099c7',
'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
}
username, password = self._get_login_info()
if username:
data.update({
'grant_type': 'password',
'password': password,
'username': username,
})
else:
data['grant_type'] = 'client_credentials'
try:
token = self._download_json(
'https://graphql.api.dailymotion.com/oauth/token',
None, 'Downloading Access Token',
data=urlencode_postdata(data))['access_token']
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError(self._parse_json(
e.cause.response.read().decode(), xid)['error_description'], expected=True)
raise
self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
self._HEADERS['Authorization'] = 'Bearer ' + token
resp = self._download_json( resp = self._download_json(
'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({ 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
@ -393,9 +399,55 @@ class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
yield '//dailymotion.com/playlist/%s' % p yield '//dailymotion.com/playlist/%s' % p
class DailymotionSearchIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:search'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/search/(?P<id>[^/?#]+)/videos'
_PAGE_SIZE = 20
_TESTS = [{
'url': 'http://www.dailymotion.com/search/king of turtles/videos',
'info_dict': {
'id': 'king of turtles',
'title': 'king of turtles',
},
'playlist_mincount': 90,
}]
_SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } '
def _call_search_api(self, term, page, note):
if not self._HEADERS.get('Authorization'):
self._HEADERS['Authorization'] = f'Bearer {self._get_token(term)}'
resp = self._download_json(
'https://graphql.api.dailymotion.com/', None, note, data=json.dumps({
'operationName': 'SEARCH_QUERY',
'query': self._SEARCH_QUERY,
'variables': {
'limit': 20,
'page': page,
'query': term,
}
}).encode(), headers=self._HEADERS)
obj = traverse_obj(resp, ('data', 'search', {dict}))
if not obj:
raise ExtractorError(
traverse_obj(resp, ('errors', 0, 'message', {str})) or 'Could not fetch search data')
return obj
def _fetch_page(self, term, page):
page += 1
response = self._call_search_api(term, page, f'Searching "{term}" page {page}')
for xid in traverse_obj(response, ('videos', 'edges', ..., 'node', 'xid')):
yield self.url_result(f'https://www.dailymotion.com/video/{xid}', DailymotionIE, xid)
def _real_extract(self, url):
term = urllib.parse.unquote_plus(self._match_id(url))
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, term), self._PAGE_SIZE), term, term)
class DailymotionUserIE(DailymotionPlaylistBaseIE): class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user' IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv', 'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': { 'info_dict': {

@ -83,7 +83,6 @@ class DamtomoRecordIE(DamtomoBaseIE):
'info_dict': { 'info_dict': {
'id': '27376862', 'id': '27376862',
'title': 'イカSUMMER [良音]', 'title': 'イカSUMMER [良音]',
'description': None,
'uploader': '', 'uploader': '',
'uploader_id': 'MzAyMDExNTY', 'uploader_id': 'MzAyMDExNTY',
'upload_date': '20210721', 'upload_date': '20210721',

@ -27,7 +27,7 @@ class DaumIE(DaumBaseIE):
'duration': 2117, 'duration': 2117,
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'uploader_id': 186139, 'uploader_id': '186139',
'uploader': '콘간지', 'uploader': '콘간지',
'timestamp': 1387310323, 'timestamp': 1387310323,
}, },
@ -44,7 +44,7 @@ class DaumIE(DaumBaseIE):
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'uploader': 'MBC 예능', 'uploader': 'MBC 예능',
'uploader_id': 132251, 'uploader_id': '132251',
'timestamp': 1421604228, 'timestamp': 1421604228,
}, },
}, { }, {
@ -63,7 +63,7 @@ class DaumIE(DaumBaseIE):
'view_count': int, 'view_count': int,
'comment_count': int, 'comment_count': int,
'uploader': '까칠한 墮落始祖 황비홍님의', 'uploader': '까칠한 墮落始祖 황비홍님의',
'uploader_id': 560824, 'uploader_id': '560824',
'timestamp': 1203770745, 'timestamp': 1203770745,
}, },
}, { }, {
@ -77,7 +77,7 @@ class DaumIE(DaumBaseIE):
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회', 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
'upload_date': '20170129', 'upload_date': '20170129',
'uploader': '쇼! 음악중심', 'uploader': '쇼! 음악중심',
'uploader_id': 2653210, 'uploader_id': '2653210',
'timestamp': 1485684628, 'timestamp': 1485684628,
}, },
}] }]
@ -107,7 +107,7 @@ class DaumClipIE(DaumBaseIE):
'duration': 3868, 'duration': 3868,
'view_count': int, 'view_count': int,
'uploader': 'GOMeXP', 'uploader': 'GOMeXP',
'uploader_id': 6667, 'uploader_id': '6667',
'timestamp': 1377911092, 'timestamp': 1377911092,
}, },
}, { }, {

@ -1,54 +0,0 @@
from .common import InfoExtractor
from ..utils import js_to_json
class DiggIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?digg\.com/video/(?P<id>[^/?#&]+)'
_TESTS = [{
# JWPlatform via provider
'url': 'http://digg.com/video/sci-fi-short-jonah-daniel-kaluuya-get-out',
'info_dict': {
'id': 'LcqvmS0b',
'ext': 'mp4',
'title': "'Get Out' Star Daniel Kaluuya Goes On 'Moby Dick'-Like Journey In Sci-Fi Short 'Jonah'",
'description': 'md5:541bb847648b6ee3d6514bc84b82efda',
'upload_date': '20180109',
'timestamp': 1515530551,
},
'params': {
'skip_download': True,
},
}, {
# Youtube via provider
'url': 'http://digg.com/video/dog-boat-seal-play',
'only_matching': True,
}, {
# vimeo as regular embed
'url': 'http://digg.com/video/dream-girl-short-film',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
info = self._parse_json(
self._search_regex(
r'(?s)video_info\s*=\s*({.+?});\n', webpage, 'video info',
default='{}'), display_id, transform_source=js_to_json,
fatal=False)
video_id = info.get('video_id')
if video_id:
provider = info.get('provider_name')
if provider == 'youtube':
return self.url_result(
video_id, ie='Youtube', video_id=video_id)
elif provider == 'jwplayer':
return self.url_result(
'jwplatform:%s' % video_id, ie='JWPlatform',
video_id=video_id)
return self.url_result(url, 'Generic')

@ -9,6 +9,7 @@ from ..utils import (
class DTubeIE(InfoExtractor): class DTubeIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})' _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
_TEST = { _TEST = {
'url': 'https://d.tube/#!/v/broncnutz/x380jtr1', 'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',

@ -8,9 +8,9 @@ from ..utils import (
class DumpertIE(InfoExtractor): class DumpertIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl(?: (?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:
/(?:mediabase|embed|item)/| (?:mediabase|embed|item)/|
(?:/toppers|/latest|/?)\?selectedId= [^#]*[?&]selectedId=
)(?P<id>[0-9]+[/_][0-9a-zA-Z]+)''' )(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.dumpert.nl/item/6646981_951bc60f', 'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
@ -56,6 +56,9 @@ class DumpertIE(InfoExtractor):
}, { }, {
'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185', 'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dumpert.nl/toppers/dag?selectedId=100086074_f5cef3ac',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

@ -32,7 +32,7 @@ class DuoplayIE(InfoExtractor):
'season_number': 2, 'season_number': 2,
'episode': 'Operatsioon "Öö"', 'episode': 'Operatsioon "Öö"',
'episode_number': 12, 'episode_number': 12,
'episode_id': 24, 'episode_id': '24',
}, },
}, { }, {
'note': 'Empty title', 'note': 'Empty title',
@ -50,7 +50,7 @@ class DuoplayIE(InfoExtractor):
'series_id': '17', 'series_id': '17',
'season': 'Season 2', 'season': 'Season 2',
'season_number': 2, 'season_number': 2,
'episode_id': 14, 'episode_id': '14',
'release_year': 2010, 'release_year': 2010,
}, },
}, { }, {
@ -99,6 +99,6 @@ class DuoplayIE(InfoExtractor):
'season_number': ('season_id', {int_or_none}), 'season_number': ('season_id', {int_or_none}),
'episode': 'subtitle', 'episode': 'subtitle',
'episode_number': ('episode_nr', {int_or_none}), 'episode_number': ('episode_nr', {int_or_none}),
'episode_id': ('episode_id', {int_or_none}), 'episode_id': ('episode_id', {str_or_none}),
}, get_all=False) if episode_attr.get('category') != 'movies' else {}), }, get_all=False) if episode_attr.get('category') != 'movies' else {}),
} }

@ -8,6 +8,8 @@ from ..compat import compat_urlparse
class DWIE(InfoExtractor): class DWIE(InfoExtractor):
_WORKING = False
_ENABLED = None # XXX: pass through to GenericIE
IE_NAME = 'dw' IE_NAME = 'dw'
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
@ -82,6 +84,8 @@ class DWIE(InfoExtractor):
class DWArticleIE(InfoExtractor): class DWArticleIE(InfoExtractor):
_WORKING = False
_ENABLED = None # XXX: pass through to GenericIE
IE_NAME = 'dw:article' IE_NAME = 'dw:article'
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
_TEST = { _TEST = {

@ -42,7 +42,6 @@ class EplusIbIE(InfoExtractor):
'live_status': 'was_live', 'live_status': 'was_live',
'release_date': '20210719', 'release_date': '20210719',
'release_timestamp': 1626703200, 'release_timestamp': 1626703200,
'description': None,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

@ -13,6 +13,7 @@ from ..utils import (
class EuropaIE(InfoExtractor): class EuropaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)' _VALID_URL = r'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758', 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',

@ -10,6 +10,7 @@ from ..utils import (
class FancodeVodIE(InfoExtractor): class FancodeVodIE(InfoExtractor):
_WORKING = False
IE_NAME = 'fancode:vod' IE_NAME = 'fancode:vod'
_VALID_URL = r'https?://(?:www\.)?fancode\.com/video/(?P<id>[0-9]+)\b' _VALID_URL = r'https?://(?:www\.)?fancode\.com/video/(?P<id>[0-9]+)\b'
@ -126,6 +127,7 @@ class FancodeVodIE(InfoExtractor):
class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE
_WORKING = False
IE_NAME = 'fancode:live' IE_NAME = 'fancode:live'
_VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+' _VALID_URL = r'https?://(www\.)?fancode\.com/match/(?P<id>[0-9]+).+'

@ -1,69 +0,0 @@
from .common import InfoExtractor
from ..utils import int_or_none
class FilmmoduIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?filmmodu\.org/(?P<id>[^/]+-(?:turkce-dublaj-izle|altyazili-izle))'
_TESTS = [{
'url': 'https://www.filmmodu.org/f9-altyazili-izle',
'md5': 'aeefd955c2a508a5bdaa3bcec8eeb0d4',
'info_dict': {
'id': '10804',
'ext': 'mp4',
'title': 'F9',
'description': 'md5:2713f584a4d65afa2611e2948d0b953c',
'subtitles': {
'tr': [{
'ext': 'vtt',
}],
},
'thumbnail': r're:https://s[0-9]+.filmmodu.org/uploads/movie/cover/10804/xXHZeb1yhJvnSHPzZDqee0zfMb6.jpg',
},
}, {
'url': 'https://www.filmmodu.org/the-godfather-turkce-dublaj-izle',
'md5': '109f2fcb9c941330eed133971c035c00',
'info_dict': {
'id': '3646',
'ext': 'mp4',
'title': 'Baba',
'description': 'md5:d43fd651937cd75cc650883ebd8d8461',
'thumbnail': r're:https://s[0-9]+.filmmodu.org/uploads/movie/cover/3646/6xKCYgH16UuwEGAyroLU6p8HLIn.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage, fatal=True)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
real_video_id = self._search_regex(r'var\s*videoId\s*=\s*\'([0-9]+)\'', webpage, 'video_id')
video_type = self._search_regex(r'var\s*videoType\s*=\s*\'([a-z]+)\'', webpage, 'video_type')
data = self._download_json('https://www.filmmodu.org/get-source', real_video_id, query={
'movie_id': real_video_id,
'type': video_type,
})
formats = [{
'url': source['src'],
'ext': 'mp4',
'format_id': source['label'],
'height': int_or_none(source.get('res')),
'protocol': 'm3u8_native',
} for source in data['sources']]
subtitles = {}
if data.get('subtitle'):
subtitles['tr'] = [{
'url': data['subtitle'],
}]
return {
'id': real_video_id,
'display_id': video_id,
'title': title,
'description': description,
'formats': formats,
'subtitles': subtitles,
'thumbnail': thumbnail,
}

@ -1,60 +1,49 @@
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError,
determine_ext, determine_ext,
filter_dict,
format_field, format_field,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
parse_iso8601, parse_iso8601,
parse_qs, smuggle_url,
unsmuggle_url,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class FranceTVBaseInfoExtractor(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
def _make_url_result(self, video_or_full_id, catalog=None): def _make_url_result(self, video_id, url=None):
full_id = 'francetv:%s' % video_or_full_id video_id = video_id.split('@')[0] # for compat with old @catalog IDs
if '@' not in video_or_full_id and catalog: full_id = f'francetv:{video_id}'
full_id += '@%s' % catalog if url:
return self.url_result( full_id = smuggle_url(full_id, {'hostname': urllib.parse.urlparse(url).hostname})
full_id, ie=FranceTVIE.ie_key(), return self.url_result(full_id, FranceTVIE, video_id)
video_id=video_or_full_id.split('@')[0])
class FranceTVIE(InfoExtractor): class FranceTVIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'francetv:(?P<id>[^@#]+)'
(?: _GEO_COUNTRIES = ['FR']
https?:// _GEO_BYPASS = False
sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
.*?\bidDiffusion=[^&]+|
(?:
https?://videos\.francetv\.fr/video/|
francetv:
)
(?P<id>[^@]+)(?:@(?P<catalog>.+))?
)
'''
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1']
_TESTS = [{ _TESTS = [{
# without catalog 'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
'info_dict': { 'info_dict': {
'id': '162311093', 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
'ext': 'mp4', 'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus', 'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
'timestamp': 1502623500, 'timestamp': 1502623500,
'duration': 2580,
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170813', 'upload_date': '20170813',
}, },
}, { 'params': {'skip_download': 'm3u8'},
# with catalog
'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
'only_matching': True,
}, {
'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
'only_matching': True,
}, { }, {
'url': 'francetv:162311093', 'url': 'francetv:162311093',
'only_matching': True, 'only_matching': True,
@ -76,10 +65,7 @@ class FranceTVIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _extract_video(self, video_id, catalogue=None): def _extract_video(self, video_id, hostname=None):
# Videos are identified by idDiffusion so catalogue part is optional.
# However when provided, some extra formats may be returned so we pass
# it if available.
is_live = None is_live = None
videos = [] videos = []
title = None title = None
@ -91,18 +77,20 @@ class FranceTVIE(InfoExtractor):
timestamp = None timestamp = None
spritesheets = None spritesheets = None
for device_type in ('desktop', 'mobile'): # desktop+chrome returns dash; mobile+safari returns hls
for device_type, browser in [('desktop', 'chrome'), ('mobile', 'safari')]:
dinfo = self._download_json( dinfo = self._download_json(
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id, f'https://k7.ftven.fr/videos/{video_id}', video_id,
video_id, 'Downloading %s video JSON' % device_type, query={ f'Downloading {device_type} {browser} video JSON', query=filter_dict({
'device_type': device_type, 'device_type': device_type,
'browser': 'chrome', 'browser': browser,
}, fatal=False) 'domain': hostname,
}), fatal=False)
if not dinfo: if not dinfo:
continue continue
video = dinfo.get('video') video = traverse_obj(dinfo, ('video', {dict}))
if video: if video:
videos.append(video) videos.append(video)
if duration is None: if duration is None:
@ -112,7 +100,7 @@ class FranceTVIE(InfoExtractor):
if spritesheets is None: if spritesheets is None:
spritesheets = video.get('spritesheets') spritesheets = video.get('spritesheets')
meta = dinfo.get('meta') meta = traverse_obj(dinfo, ('meta', {dict}))
if meta: if meta:
if title is None: if title is None:
title = meta.get('title') title = meta.get('title')
@ -126,43 +114,46 @@ class FranceTVIE(InfoExtractor):
if timestamp is None: if timestamp is None:
timestamp = parse_iso8601(meta.get('broadcasted_at')) timestamp = parse_iso8601(meta.get('broadcasted_at'))
formats = [] formats, subtitles, video_url = [], {}, None
subtitles = {} for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
for video in videos: video_url = video['url']
format_id = video.get('format') format_id = video.get('format')
video_url = None if token_url := url_or_none(video.get('token')):
if video.get('workflow') == 'token-akamai': tokenized_url = traverse_obj(self._download_json(
token_url = video.get('token') token_url, video_id, f'Downloading signed {format_id} manifest URL',
if token_url: fatal=False, query={
token_json = self._download_json( 'format': 'json',
token_url, video_id, 'url': video_url,
'Downloading signed %s manifest URL' % format_id) }), ('url', {url_or_none}))
if token_json: if tokenized_url:
video_url = token_json.get('url') video_url = tokenized_url
if not video_url:
video_url = video.get('url')
ext = determine_ext(video_url) ext = determine_ext(video_url)
if ext == 'f4m': if ext == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=format_id, fatal=False)) video_url, video_id, f4m_id=format_id or ext, fatal=False))
elif ext == 'm3u8': elif ext == 'm3u8':
format_id = format_id or 'hls'
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4', video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
entry_protocol='m3u8_native', m3u8_id=format_id, for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
fatal=False) if mobj := re.match(rf'{format_id}-[Aa]udio-\w+-(?P<bitrate>\d+)', f['format_id']):
f.update({
'tbr': int_or_none(mobj.group('bitrate')),
'acodec': 'mp4a',
})
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
elif ext == 'mpd': elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles( fmts, subs = self._extract_mpd_formats_and_subtitles(
video_url, video_id, mpd_id=format_id, fatal=False) video_url, video_id, mpd_id=format_id or 'dash', fatal=False)
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)
elif video_url.startswith('rtmp'): elif video_url.startswith('rtmp'):
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'format_id': 'rtmp-%s' % format_id, 'format_id': join_nonempty('rtmp', format_id),
'ext': 'flv', 'ext': 'flv',
}) })
else: else:
@ -174,6 +165,13 @@ class FranceTVIE(InfoExtractor):
# XXX: what is video['captions']? # XXX: what is video['captions']?
if not formats and video_url:
urlh = self._request_webpage(
HEADRequest(video_url), video_id, 'Checking for geo-restriction',
fatal=False, expected_status=403)
if urlh and urlh.headers.get('x-errortype') == 'geo':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
for f in formats: for f in formats:
if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'): if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'):
f['language_preference'] = -10 f['language_preference'] = -10
@ -194,7 +192,7 @@ class FranceTVIE(InfoExtractor):
# a 10×10 grid of thumbnails corresponding to approximately # a 10×10 grid of thumbnails corresponding to approximately
# 2 seconds of the video; the last spritesheet may be shorter # 2 seconds of the video; the last spritesheet may be shorter
'duration': 200, 'duration': 200,
} for sheet in spritesheets] } for sheet in traverse_obj(spritesheets, (..., {url_or_none}))]
}) })
return { return {
@ -210,21 +208,15 @@ class FranceTVIE(InfoExtractor):
'series': title if episode_number else None, 'series': title if episode_number else None,
'episode_number': int_or_none(episode_number), 'episode_number': int_or_none(episode_number),
'season_number': int_or_none(season_number), 'season_number': int_or_none(season_number),
'_format_sort_fields': ('res', 'tbr', 'proto'), # prioritize m3u8 over dash
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) url, smuggled_data = unsmuggle_url(url, {})
video_id = mobj.group('id') video_id = self._match_id(url)
catalog = mobj.group('catalog') hostname = smuggled_data.get('hostname') or 'www.france.tv'
if not video_id: return self._extract_video(video_id, hostname=hostname)
qs = parse_qs(url)
video_id = qs.get('idDiffusion', [None])[0]
catalog = qs.get('catalogue', [None])[0]
if not video_id:
raise ExtractorError('Invalid URL', expected=True)
return self._extract_video(video_id, catalog)
class FranceTVSiteIE(FranceTVBaseInfoExtractor): class FranceTVSiteIE(FranceTVBaseInfoExtractor):
@ -246,6 +238,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
}, },
'add_ie': [FranceTVIE.ie_key()], 'add_ie': [FranceTVIE.ie_key()],
}, { }, {
# geo-restricted
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html', 'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
'info_dict': { 'info_dict': {
'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44', 'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44',
@ -261,6 +254,26 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1441, 'duration': 1441,
}, },
}, {
# geo-restricted livestream (workflow == 'token-akamai')
'url': 'https://www.france.tv/france-4/direct.html',
'info_dict': {
'id': '9a6a7670-dde9-4264-adbc-55b89558594b',
'ext': 'mp4',
'title': r're:France 4 en direct .+',
'live_status': 'is_live',
},
'skip': 'geo-restricted livestream',
}, {
# livestream (workflow == 'dai')
'url': 'https://www.france.tv/france-2/direct.html',
'info_dict': {
'id': '006194ea-117d-4bcf-94a9-153d999c59ae',
'ext': 'mp4',
'title': r're:France 2 en direct .+',
'live_status': 'is_live',
},
'params': {'skip_download': 'livestream'},
}, { }, {
# france3 # france3
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
@ -277,10 +290,6 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
# franceo # franceo
'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html', 'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
'only_matching': True, 'only_matching': True,
}, {
# france2 live
'url': 'https://www.france.tv/france-2/direct.html',
'only_matching': True,
}, { }, {
'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html', 'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
'only_matching': True, 'only_matching': True,
@ -304,17 +313,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
catalogue = None
video_id = self._search_regex( video_id = self._search_regex(
r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1', r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'video id', default=None, group='id') webpage, 'video id', default=None, group='id')
if not video_id: if not video_id:
video_id, catalogue = self._html_search_regex( video_id = self._html_search_regex(
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"', r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"',
webpage, 'video ID').split('@') webpage, 'video ID')
return self._make_url_result(video_id, catalogue) return self._make_url_result(video_id, url=url)
class FranceTVInfoIE(FranceTVBaseInfoExtractor): class FranceTVInfoIE(FranceTVBaseInfoExtractor):
@ -328,8 +336,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Soir 3', 'title': 'Soir 3',
'upload_date': '20190822', 'upload_date': '20190822',
'timestamp': 1566510900, 'timestamp': 1566510730,
'description': 'md5:72d167097237701d6e8452ff03b83c00', 'thumbnail': r're:^https?://.*\.jpe?g$',
'duration': 1637,
'subtitles': { 'subtitles': {
'fr': 'mincount:2', 'fr': 'mincount:2',
}, },
@ -344,8 +353,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
'info_dict': { 'info_dict': {
'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482', 'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Covid-19 : une situation catastrophique à New Dehli', 'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021',
'thumbnail': str, 'thumbnail': r're:^https?://.*\.jpe?g$',
'duration': 76, 'duration': 76,
'timestamp': 1619028518, 'timestamp': 1619028518,
'upload_date': '20210421', 'upload_date': '20210421',
@ -371,11 +380,17 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
'id': 'x4iiko0', 'id': 'x4iiko0',
'ext': 'mp4', 'ext': 'mp4',
'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen', 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016', 'description': 'md5:fdcb582c370756293a65cdfbc6ecd90e',
'timestamp': 1467011958, 'timestamp': 1467011958,
'upload_date': '20160627',
'uploader': 'France Inter', 'uploader': 'France Inter',
'uploader_id': 'x2q2ez', 'uploader_id': 'x2q2ez',
'upload_date': '20160627',
'view_count': int,
'tags': ['Politique', 'France Inter', '27 juin 2016', 'Linvité de 8h20', 'Cécile Duflot', 'Patrick Cohen'],
'age_limit': 0,
'duration': 640,
'like_count': int,
'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080',
}, },
'add_ie': ['Dailymotion'], 'add_ie': ['Dailymotion'],
}, { }, {
@ -405,4 +420,4 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'), r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
webpage, 'video id') webpage, 'video id')
return self._make_url_result(video_id) return self._make_url_result(video_id, url=url)

@ -301,7 +301,7 @@ class FunimationShowIE(FunimationBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'https://www.funimation.com/en/shows/sk8-the-infinity', 'url': 'https://www.funimation.com/en/shows/sk8-the-infinity',
'info_dict': { 'info_dict': {
'id': 1315000, 'id': '1315000',
'title': 'SK8 the Infinity' 'title': 'SK8 the Infinity'
}, },
'playlist_count': 13, 'playlist_count': 13,
@ -312,7 +312,7 @@ class FunimationShowIE(FunimationBaseIE):
# without lang code # without lang code
'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/', 'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/',
'info_dict': { 'info_dict': {
'id': 39643, 'id': '39643',
'title': 'Ouran High School Host Club' 'title': 'Ouran High School Host Club'
}, },
'playlist_count': 26, 'playlist_count': 26,
@ -339,7 +339,7 @@ class FunimationShowIE(FunimationBaseIE):
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': show_info['id'], 'id': str_or_none(show_info['id']),
'title': show_info['name'], 'title': show_info['name'],
'entries': orderedSet( 'entries': orderedSet(
self.url_result( self.url_result(

@ -19,7 +19,6 @@ class GabTVIE(InfoExtractor):
'id': '61217eacea5665de450d0488', 'id': '61217eacea5665de450d0488',
'ext': 'mp4', 'ext': 'mp4',
'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY', 'title': 'WHY WAS AMERICA IN AFGHANISTAN - AMERICA FIRST AGAINST AMERICAN OLIGARCHY',
'description': None,
'uploader': 'Wurzelroot', 'uploader': 'Wurzelroot',
'uploader_id': '608fb0a85738fd1974984f7d', 'uploader_id': '608fb0a85738fd1974984f7d',
'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488', 'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488',

@ -1,46 +0,0 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
get_element_by_id,
)
class GameInformerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)'
_TESTS = [{
# normal Brightcove embed code extracted with BrightcoveNewIE._extract_url
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
'md5': '292f26da1ab4beb4c9099f1304d2b071',
'info_dict': {
'id': '4515472681001',
'ext': 'mp4',
'title': 'Replay - Animal Crossing',
'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
'timestamp': 1443457610,
'upload_date': '20150928',
'uploader_id': '694940074001',
},
}, {
# Brightcove id inside unique element with field--name-field-brightcove-video-id class
'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue',
'info_dict': {
'id': '6057111913001',
'ext': 'mp4',
'title': 'New Gameplay Today Streets Of Rogue',
'timestamp': 1562699001,
'upload_date': '20190709',
'uploader_id': '694940074001',
},
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(
url, display_id, headers=self.geo_verification_headers())
brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage))
brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage)
return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id)

@ -267,9 +267,9 @@ class GameJoltIE(GameJoltBaseIE):
'id': 'dszyjnwi', 'id': 'dszyjnwi',
'ext': 'webm', 'ext': 'webm',
'title': 'gif-presentacion-mejorado-dszyjnwi', 'title': 'gif-presentacion-mejorado-dszyjnwi',
'n_entries': 1,
} }
}] }],
'playlist_count': 1,
}, { }, {
# Multiple GIFs # Multiple GIFs
'url': 'https://gamejolt.com/p/gif-yhsqkumq', 'url': 'https://gamejolt.com/p/gif-yhsqkumq',
@ -374,7 +374,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
'info_dict': { 'info_dict': {
'id': '657899', 'id': '657899',
'title': 'Friday Night Funkin\': Vs Oswald', 'title': 'Friday Night Funkin\': Vs Oswald',
'n_entries': None,
}, },
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
@ -384,7 +383,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
'url': r're:^https://.+vs-oswald-menu-music\.mp3$', 'url': r're:^https://.+vs-oswald-menu-music\.mp3$',
'release_timestamp': 1635190816, 'release_timestamp': 1635190816,
'release_date': '20211025', 'release_date': '20211025',
'n_entries': 3,
} }
}, { }, {
'info_dict': { 'info_dict': {
@ -394,7 +392,6 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
'url': r're:^https://.+rabbit-s-luck--full-version-\.mp3$', 'url': r're:^https://.+rabbit-s-luck--full-version-\.mp3$',
'release_timestamp': 1635190841, 'release_timestamp': 1635190841,
'release_date': '20211025', 'release_date': '20211025',
'n_entries': 3,
} }
}, { }, {
'info_dict': { 'info_dict': {
@ -404,9 +401,9 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE):
'url': r're:^https://.+last-straw\.mp3$', 'url': r're:^https://.+last-straw\.mp3$',
'release_timestamp': 1635881104, 'release_timestamp': 1635881104,
'release_date': '20211102', 'release_date': '20211102',
'n_entries': 3,
} }
}] }],
'playlist_count': 3,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

@ -21,7 +21,6 @@ class GaskrankIE(InfoExtractor):
'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden', 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden',
'uploader_id': 'Bikefun', 'uploader_id': 'Bikefun',
'upload_date': '20170110', 'upload_date': '20170110',
'uploader_url': None,
} }
}, { }, {
'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm', 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm',

@ -2,6 +2,7 @@ from .common import InfoExtractor
class GazetaIE(InfoExtractor): class GazetaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)' _VALID_URL = r'(?P<url>https?://(?:www\.)?gazeta\.ru/(?:[^/]+/)?video/(?:main/)*(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[A-Za-z0-9-_.]+)\.s?html)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml', 'url': 'http://www.gazeta.ru/video/main/zadaite_vopros_vladislavu_yurevichu.shtml',

@ -7,6 +7,7 @@ from ..utils import remove_start, smuggle_url, urlencode_postdata
class GDCVaultIE(InfoExtractor): class GDCVaultIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?' _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'
_NETRC_MACHINE = 'gdcvault' _NETRC_MACHINE = 'gdcvault'
_TESTS = [ _TESTS = [

@ -1,93 +0,0 @@
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import parse_duration, parse_iso8601, qualities, str_to_int
class GigaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?giga\.de/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.giga.de/filme/anime-awesome/trailer/anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss/',
'md5': '6bc5535e945e724640664632055a584f',
'info_dict': {
'id': '2622086',
'display_id': 'anime-awesome-chihiros-reise-ins-zauberland-das-beste-kommt-zum-schluss',
'ext': 'mp4',
'title': 'Anime Awesome: Chihiros Reise ins Zauberland Das Beste kommt zum Schluss',
'description': 'md5:afdf5862241aded4718a30dff6a57baf',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 578,
'timestamp': 1414749706,
'upload_date': '20141031',
'uploader': 'Robin Schweiger',
'view_count': int,
},
}, {
'url': 'http://www.giga.de/games/channel/giga-top-montag/giga-topmontag-die-besten-serien-2014/',
'only_matching': True,
}, {
'url': 'http://www.giga.de/extra/netzkultur/videos/giga-games-tom-mats-robin-werden-eigene-wege-gehen-eine-ankuendigung/',
'only_matching': True,
}, {
'url': 'http://www.giga.de/tv/jonas-liest-spieletitel-eingedeutscht-episode-2/',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
[r'data-video-id="(\d+)"', r'/api/video/jwplayer/#v=(\d+)'],
webpage, 'video id')
playlist = self._download_json(
'http://www.giga.de/api/syndication/video/video_id/%s/playlist.json?content=syndication/key/368b5f151da4ae05ced7fa296bdff65a/'
% video_id, video_id)[0]
quality = qualities(['normal', 'hd720'])
formats = []
for format_id in itertools.count(0):
fmt = playlist.get(compat_str(format_id))
if not fmt:
break
formats.append({
'url': fmt['src'],
'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
'quality': quality(fmt['quality']),
})
title = self._html_search_meta(
'title', webpage, 'title', fatal=True)
description = self._html_search_meta(
'description', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
duration = parse_duration(self._search_regex(
r'(?s)(?:data-video-id="{0}"|data-video="[^"]*/api/video/jwplayer/#v={0}[^"]*")[^>]*>.+?<span class="duration">([^<]+)</span>'.format(video_id),
webpage, 'duration', fatal=False))
timestamp = parse_iso8601(self._search_regex(
r'datetime="([^"]+)"', webpage, 'upload date', fatal=False))
uploader = self._search_regex(
r'class="author">([^<]+)</a>', webpage, 'uploader', fatal=False)
view_count = str_to_int(self._search_regex(
r'<span class="views"><strong>([\d.,]+)</strong>',
webpage, 'view count', fatal=False))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader': uploader,
'view_count': view_count,
'formats': formats,
}

@ -6,6 +6,7 @@ from ..utils import (
class GodTubeIE(InfoExtractor): class GodTubeIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)' _VALID_URL = r'https?://(?:www\.)?godtube\.com/watch/\?v=(?P<id>[\da-zA-Z]+)'
_TESTS = [ _TESTS = [
{ {

@ -40,6 +40,22 @@ class GoPlayIE(InfoExtractor):
'title': 'A Family for the Holidays', 'title': 'A Family for the Holidays',
}, },
'skip': 'This video is only available for registered users' 'skip': 'This video is only available for registered users'
}, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
'ext': 'mp4',
'title': 'S11 - Aflevering 1',
'episode': 'Episode 1',
'series': 'De Mol',
'season_number': 11,
'episode_number': 1,
'season': 'Season 11'
},
'params': {
'skip_download': True
},
'skip': 'This video is only available for registered users'
}] }]
_id_token = None _id_token = None
@ -77,16 +93,39 @@ class GoPlayIE(InfoExtractor):
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
video_id, headers={'Authorization': 'Bearer %s' % self._id_token}) video_id, headers={
'Authorization': 'Bearer %s' % self._id_token,
**self.geo_verification_headers(),
})
if 'manifestUrls' in api:
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS')
formats, subs = self._extract_m3u8_formats_and_subtitles( else:
api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS') if 'ssai' not in api:
raise ExtractorError('expecting Google SSAI stream')
ssai_content_source_id = api['ssai']['contentSourceID']
ssai_video_id = api['ssai']['videoID']
dai = self._download_json(
f'https://dai.google.com/ondemand/dash/content/{ssai_content_source_id}/vid/{ssai_video_id}/streams',
video_id, data=b'{"api-key":"null"}',
headers={'content-type': 'application/json'})
periods = self._extract_mpd_periods(dai['stream_manifest'], video_id)
# skip pre-roll and mid-roll ads
periods = [p for p in periods if '-ad-' not in p['id']]
formats, subtitles = self._merge_mpd_periods(periods)
info_dict.update({ info_dict.update({
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
}) })
return info_dict return info_dict

@ -5,6 +5,7 @@ from ..utils import ExtractorError, urlencode_postdata
class HotNewHipHopIE(InfoExtractor): class HotNewHipHopIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
_TEST = { _TEST = {
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',

@ -115,11 +115,11 @@ class HotStarIE(HotStarBaseIE):
'upload_date': '20190501', 'upload_date': '20190501',
'duration': 1219, 'duration': 1219,
'channel': 'StarPlus', 'channel': 'StarPlus',
'channel_id': 3, 'channel_id': '3',
'series': 'Ek Bhram - Sarvagun Sampanna', 'series': 'Ek Bhram - Sarvagun Sampanna',
'season': 'Chapter 1', 'season': 'Chapter 1',
'season_number': 1, 'season_number': 1,
'season_id': 6771, 'season_id': '6771',
'episode': 'Janhvi Targets Suman', 'episode': 'Janhvi Targets Suman',
'episode_number': 8, 'episode_number': 8,
} }
@ -135,12 +135,12 @@ class HotStarIE(HotStarBaseIE):
'channel': 'StarPlus', 'channel': 'StarPlus',
'series': 'Anupama', 'series': 'Anupama',
'season_number': 1, 'season_number': 1,
'season_id': 7399, 'season_id': '7399',
'upload_date': '20230307', 'upload_date': '20230307',
'episode': 'Anupama, Anuj Share a Moment', 'episode': 'Anupama, Anuj Share a Moment',
'episode_number': 853, 'episode_number': 853,
'duration': 1272, 'duration': 1272,
'channel_id': 3, 'channel_id': '3',
}, },
'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes 'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
}, { }, {
@ -155,12 +155,12 @@ class HotStarIE(HotStarBaseIE):
'channel': 'Hotstar Specials', 'channel': 'Hotstar Specials',
'series': 'Kana Kaanum Kaalangal', 'series': 'Kana Kaanum Kaalangal',
'season_number': 1, 'season_number': 1,
'season_id': 9441, 'season_id': '9441',
'upload_date': '20220421', 'upload_date': '20220421',
'episode': 'Back To School', 'episode': 'Back To School',
'episode_number': 1, 'episode_number': 1,
'duration': 1810, 'duration': 1810,
'channel_id': 54, 'channel_id': '54',
}, },
}, { }, {
'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286', 'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
@ -325,11 +325,11 @@ class HotStarIE(HotStarBaseIE):
'formats': formats, 'formats': formats,
'subtitles': subs, 'subtitles': subs,
'channel': video_data.get('channelName'), 'channel': video_data.get('channelName'),
'channel_id': video_data.get('channelId'), 'channel_id': str_or_none(video_data.get('channelId')),
'series': video_data.get('showName'), 'series': video_data.get('showName'),
'season': video_data.get('seasonName'), 'season': video_data.get('seasonName'),
'season_number': int_or_none(video_data.get('seasonNo')), 'season_number': int_or_none(video_data.get('seasonNo')),
'season_id': video_data.get('seasonId'), 'season_id': str_or_none(video_data.get('seasonId')),
'episode': video_data.get('title'), 'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episodeNo')), 'episode_number': int_or_none(video_data.get('episodeNo')),
} }

@ -114,7 +114,6 @@ class HungamaSongIE(InfoExtractor):
'title': 'Lucky Ali - Kitni Haseen Zindagi', 'title': 'Lucky Ali - Kitni Haseen Zindagi',
'track': 'Kitni Haseen Zindagi', 'track': 'Kitni Haseen Zindagi',
'artist': 'Lucky Ali', 'artist': 'Lucky Ali',
'album': None,
'release_year': 2000, 'release_year': 2000,
'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png', 'thumbnail': 'https://stat2.hungama.ind.in/assets/images/default_images/da-200x200.png',
}, },

@ -9,7 +9,7 @@ class MonsterSirenHypergryphMusicIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '514562', 'id': '514562',
'ext': 'wav', 'ext': 'wav',
'artist': ['塞壬唱片-MSR'], 'artists': ['塞壬唱片-MSR'],
'album': 'Flame Shadow', 'album': 'Flame Shadow',
'title': 'Flame Shadow', 'title': 'Flame Shadow',
} }
@ -27,6 +27,6 @@ class MonsterSirenHypergryphMusicIE(InfoExtractor):
'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')), 'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')),
'ext': 'wav', 'ext': 'wav',
'vcodec': 'none', 'vcodec': 'none',
'artist': traverse_obj(json_data, ('player', 'songDetail', 'artists')), 'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)),
'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')) 'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name'))
} }

@ -617,6 +617,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
class InstagramUserIE(InstagramPlaylistBaseIE): class InstagramUserIE(InstagramPlaylistBaseIE):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
IE_DESC = 'Instagram user profile' IE_DESC = 'Instagram user profile'
IE_NAME = 'instagram:user' IE_NAME = 'instagram:user'

@ -2,6 +2,8 @@ from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor): class JeuxVideoIE(InfoExtractor):
_WORKING = False
_ENABLED = None # XXX: pass through to GenericIE
_VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
_TESTS = [{ _TESTS = [{

@ -1,66 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
join_nonempty,
traverse_obj,
unified_timestamp,
update_url_query,
)
class Kanal2IE(InfoExtractor):
_VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)'
_TESTS = [{
'note': 'Test standard url (#5575)',
'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
'md5': '7ea7b16266ec1798743777df241883dd',
'info_dict': {
'id': '40792',
'ext': 'mp4',
'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)',
'thumbnail': r're:https?://.*\.jpg$',
'description': 'md5:53cabf3c5d73150d594747f727431248',
'upload_date': '20160805',
'timestamp': 1470420000,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
playlist = self._download_json(
f'https://kanal2.postimees.ee/player/playlist/{video_id}',
video_id, query={'type': 'episodes'},
headers={'X-Requested-With': 'XMLHttpRequest'})
return {
'id': video_id,
'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
'description': traverse_obj(playlist, ('info', 'description')),
'thumbnail': traverse_obj(playlist, ('data', 'image')),
'formats': self.get_formats(playlist, video_id),
'timestamp': unified_timestamp(self._search_regex(
r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
}
def get_formats(self, playlist, video_id):
path = traverse_obj(playlist, ('data', 'path'))
if not path:
raise ExtractorError('Path value not found in playlist JSON response')
session = self._download_json(
'https://sts.postimees.ee/session/register',
video_id, note='Creating session', errnote='Error creating session',
headers={
'X-Original-URI': path,
'Accept': 'application/json',
})
if session.get('reason') != 'OK' or not session.get('session'):
reason = session.get('reason', 'unknown error')
raise ExtractorError(f'Unable to obtain session: {reason}')
formats = []
for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
formats.extend(self._extract_m3u8_formats(
update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
return formats

@ -8,6 +8,7 @@ from .common import InfoExtractor
class KankaNewsIE(InfoExtractor): class KankaNewsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P<id>\d+)\.shtml' _VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P<id>\d+)\.shtml'
_TESTS = [{ _TESTS = [{
'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227', 'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227',

@ -1,96 +0,0 @@
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
fix_xml_ampersands,
float_or_none,
xpath_with_ns,
xpath_text,
)
class KarriereVideosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
'info_dict': {
'id': '32c91',
'ext': 'flv',
'title': 'AltenpflegerIn',
'description': 'md5:dbadd1259fde2159a9b28667cb664ae2',
'thumbnail': r're:^http://.*\.png',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
# broken ampersands
'url': 'http://www.karrierevideos.at/orientierung/vaeterkarenz-und-neue-chancen-fuer-muetter-baby-was-nun',
'info_dict': {
'id': '5sniu',
'ext': 'flv',
'title': 'Väterkarenz und neue Chancen für Mütter - "Baby - was nun?"',
'description': 'md5:97092c6ad1fd7d38e9d6a5fdeb2bcc33',
'thumbnail': r're:^http://.*\.png',
},
'params': {
# rtmp download
'skip_download': True,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = (self._html_search_meta('title', webpage, default=None)
or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
video_id = self._search_regex(
r'/config/video/(.+?)\.xml', webpage, 'video id')
# Server returns malformed headers
# Force Accept-Encoding: * to prevent gzipped results
playlist = self._download_xml(
'http://www.karrierevideos.at/player-playlist.xml.php?p=%s' % video_id,
video_id, transform_source=fix_xml_ampersands,
headers={'Accept-Encoding': '*'})
NS_MAP = {
'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'
}
def ns(path):
return xpath_with_ns(path, NS_MAP)
item = playlist.find('./tracklist/item')
video_file = xpath_text(
item, ns('./jwplayer:file'), 'video url', fatal=True)
streamer = xpath_text(
item, ns('./jwplayer:streamer'), 'streamer', fatal=True)
uploader = xpath_text(
item, ns('./jwplayer:author'), 'uploader')
duration = float_or_none(
xpath_text(item, ns('./jwplayer:duration'), 'duration'))
description = self._html_search_regex(
r'(?s)<div class="leadtext">(.+?)</div>',
webpage, 'description')
thumbnail = self._html_search_meta(
'thumbnail', webpage, 'thumbnail')
if thumbnail:
thumbnail = compat_urlparse.urljoin(url, thumbnail)
return {
'id': video_id,
'url': streamer.replace('rtmpt', 'rtmp'),
'play_path': 'mp4:%s' % video_file,
'ext': 'flv',
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'duration': duration,
}

@ -3,6 +3,7 @@ from ..utils import int_or_none
class KelbyOneIE(InfoExtractor): class KelbyOneIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://members\.kelbyone\.com/course/(?P<id>[^$&?#/]+)' _VALID_URL = r'https?://members\.kelbyone\.com/course/(?P<id>[^$&?#/]+)'
_TESTS = [{ _TESTS = [{

@ -1,119 +0,0 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
float_or_none,
int_or_none,
url_or_none,
)
class KonserthusetPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:konserthusetplay|rspoplay)\.se/\?.*\bm=(?P<id>[^&]+)'
_TESTS = [{
'url': 'http://www.konserthusetplay.se/?m=CKDDnlCY-dhWAAqiMERd-A',
'md5': 'e3fd47bf44e864bd23c08e487abe1967',
'info_dict': {
'id': 'CKDDnlCY-dhWAAqiMERd-A',
'ext': 'mp4',
'title': 'Orkesterns instrument: Valthornen',
'description': 'md5:f10e1f0030202020396a4d712d2fa827',
'thumbnail': 're:^https?://.*$',
'duration': 398.76,
},
}, {
'url': 'http://rspoplay.se/?m=elWuEH34SMKvaO4wO_cHBw',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
e = self._search_regex(
r'https?://csp\.picsearch\.com/rest\?.*\be=(.+?)[&"\']', webpage, 'e')
rest = self._download_json(
'http://csp.picsearch.com/rest?e=%s&containerId=mediaplayer&i=object' % e,
video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
media = rest['media']
player_config = media['playerconfig']
playlist = player_config['playlist']
source = next(f for f in playlist if f.get('bitrates') or f.get('provider'))
FORMAT_ID_REGEX = r'_([^_]+)_h264m\.mp4'
formats = []
m3u8_url = source.get('url')
if m3u8_url and determine_ext(m3u8_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
fallback_url = source.get('fallbackUrl')
fallback_format_id = None
if fallback_url:
fallback_format_id = self._search_regex(
FORMAT_ID_REGEX, fallback_url, 'format id', default=None)
connection_url = (player_config.get('rtmp', {}).get(
'netConnectionUrl') or player_config.get(
'plugins', {}).get('bwcheck', {}).get('netConnectionUrl'))
if connection_url:
for f in source['bitrates']:
video_url = f.get('url')
if not video_url:
continue
format_id = self._search_regex(
FORMAT_ID_REGEX, video_url, 'format id', default=None)
f_common = {
'vbr': int_or_none(f.get('bitrate')),
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
}
f = f_common.copy()
f.update({
'url': connection_url,
'play_path': video_url,
'format_id': 'rtmp-%s' % format_id if format_id else 'rtmp',
'ext': 'flv',
})
formats.append(f)
if format_id and format_id == fallback_format_id:
f = f_common.copy()
f.update({
'url': fallback_url,
'format_id': 'http-%s' % format_id if format_id else 'http',
})
formats.append(f)
if not formats and fallback_url:
formats.append({
'url': fallback_url,
})
title = player_config.get('title') or media['title']
description = player_config.get('mediaInfo', {}).get('description')
thumbnail = media.get('image')
duration = float_or_none(media.get('duration'), 1000)
subtitles = {}
captions = source.get('captionsAvailableLanguages')
if isinstance(captions, dict):
for lang, subtitle_url in captions.items():
subtitle_url = url_or_none(subtitle_url)
if lang != 'none' and subtitle_url:
subtitles.setdefault(lang, []).append({'url': subtitle_url})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
'subtitles': subtitles,
}

@ -6,6 +6,7 @@ from ..utils import (
class KooIE(InfoExtractor): class KooIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)' _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
_TESTS = [{ # Test for video in the comments _TESTS = [{ # Test for video in the comments
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde', 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',

@ -8,6 +8,7 @@ from ..utils import (
class KrasViewIE(InfoExtractor): class KrasViewIE(InfoExtractor):
_WORKING = False
IE_DESC = 'Красвью' IE_DESC = 'Красвью'
_VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)' _VALID_URL = r'https?://krasview\.ru/(?:video|embed)/(?P<id>\d+)'

@ -1,83 +0,0 @@
import random
import urllib.parse
from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
timeconvert,
update_url_query,
xpath_text,
)
class KUSIIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
_TESTS = [{
'url': 'http://www.kusi.com/story/32849881/turko-files-refused-to-help-it-aint-right',
'md5': '4e76ce8e53660ce9697d06c0ba6fc47d',
'info_dict': {
'id': '12689020',
'ext': 'mp4',
'title': "Turko Files: Refused to Help, It Ain't Right!",
'duration': 223.586,
'upload_date': '20160826',
'timestamp': 1472233118,
'thumbnail': r're:^https?://.*\.jpg$'
},
}, {
'url': 'http://kusi.com/video?clipId=12203019',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
clip_id = mobj.group('clipId')
video_id = clip_id or mobj.group('path')
webpage = self._download_webpage(url, video_id)
if clip_id is None:
video_id = clip_id = self._html_search_regex(
r'"clipId"\s*,\s*"(\d+)"', webpage, 'clip id')
affiliate_id = self._search_regex(
r'affiliateId\s*:\s*\'([^\']+)\'', webpage, 'affiliate id')
# See __Packages/worldnow/model/GalleryModel.as of WNGallery.swf
xml_url = update_url_query('http://www.kusi.com/build.asp', {
'buildtype': 'buildfeaturexmlrequest',
'featureType': 'Clip',
'featureid': clip_id,
'affiliateno': affiliate_id,
'clientgroupid': '1',
'rnd': int(round(random.random() * 1000000)),
})
doc = self._download_xml(xml_url, video_id)
video_title = xpath_text(doc, 'HEADLINE', fatal=True)
duration = float_or_none(xpath_text(doc, 'DURATION'), scale=1000)
description = xpath_text(doc, 'ABSTRACT')
thumbnail = xpath_text(doc, './THUMBNAILIMAGE/FILENAME')
creation_time = timeconvert(xpath_text(doc, 'rfc822creationdate'))
quality_options = doc.find('{http://search.yahoo.com/mrss/}group').findall('{http://search.yahoo.com/mrss/}content')
formats = []
for quality in quality_options:
formats.append({
'url': urllib.parse.unquote_plus(quality.attrib['url']),
'height': int_or_none(quality.attrib.get('height')),
'width': int_or_none(quality.attrib.get('width')),
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
})
return {
'id': video_id,
'title': video_title,
'description': description,
'duration': duration,
'formats': formats,
'thumbnail': thumbnail,
'timestamp': creation_time,
}

@ -54,6 +54,7 @@ class KuwoBaseIE(InfoExtractor):
class KuwoIE(KuwoBaseIE): class KuwoIE(KuwoBaseIE):
_WORKING = False
IE_NAME = 'kuwo:song' IE_NAME = 'kuwo:song'
IE_DESC = '酷我音乐' IE_DESC = '酷我音乐'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)'
@ -133,6 +134,7 @@ class KuwoIE(KuwoBaseIE):
class KuwoAlbumIE(InfoExtractor): class KuwoAlbumIE(InfoExtractor):
_WORKING = False
IE_NAME = 'kuwo:album' IE_NAME = 'kuwo:album'
IE_DESC = '酷我音乐 - 专辑' IE_DESC = '酷我音乐 - 专辑'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/' _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/'
@ -169,6 +171,7 @@ class KuwoAlbumIE(InfoExtractor):
class KuwoChartIE(InfoExtractor): class KuwoChartIE(InfoExtractor):
_WORKING = False
IE_NAME = 'kuwo:chart' IE_NAME = 'kuwo:chart'
IE_DESC = '酷我音乐 - 排行榜' IE_DESC = '酷我音乐 - 排行榜'
_VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm' _VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
@ -194,6 +197,7 @@ class KuwoChartIE(InfoExtractor):
class KuwoSingerIE(InfoExtractor): class KuwoSingerIE(InfoExtractor):
_WORKING = False
IE_NAME = 'kuwo:singer' IE_NAME = 'kuwo:singer'
IE_DESC = '酷我音乐 - 歌手' IE_DESC = '酷我音乐 - 歌手'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)'
@ -251,6 +255,7 @@ class KuwoSingerIE(InfoExtractor):
class KuwoCategoryIE(InfoExtractor): class KuwoCategoryIE(InfoExtractor):
_WORKING = False
IE_NAME = 'kuwo:category' IE_NAME = 'kuwo:category'
IE_DESC = '酷我音乐 - 分类' IE_DESC = '酷我音乐 - 分类'
_VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm' _VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
@ -290,6 +295,7 @@ class KuwoCategoryIE(InfoExtractor):
class KuwoMvIE(KuwoBaseIE): class KuwoMvIE(KuwoBaseIE):
_WORKING = False
IE_NAME = 'kuwo:mv' IE_NAME = 'kuwo:mv'
IE_DESC = '酷我音乐 - MV' IE_DESC = '酷我音乐 - MV'
_VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/' _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/'

@ -231,7 +231,6 @@ class LBRYIE(LBRYBaseIE):
'release_timestamp': int, 'release_timestamp': int,
'release_date': str, 'release_date': str,
'tags': list, 'tags': list,
'duration': None,
'channel': 'RT', 'channel': 'RT',
'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66', 'channel_id': 'fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',
'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66', 'channel_url': 'https://odysee.com/@RT:fdd11cb3ab75f95efb7b3bc2d726aa13ac915b66',

@ -10,6 +10,7 @@ from ..utils import (
class Lecture2GoIE(InfoExtractor): class Lecture2GoIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)' _VALID_URL = r'https?://lecture2go\.uni-hamburg\.de/veranstaltungen/-/v/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473', 'url': 'https://lecture2go.uni-hamburg.de/veranstaltungen/-/v/17473',

@ -2,6 +2,7 @@ from .common import InfoExtractor
class LentaIE(InfoExtractor): class LentaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?lenta\.ru/[^/]+/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/', 'url': 'https://lenta.ru/news/2018/03/22/savshenko_go/',

@ -22,8 +22,6 @@ class LikeeIE(InfoExtractor):
'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4', 'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4',
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'uploader': 'Huỳnh Hồng Qu&acirc;n ', 'uploader': 'Huỳnh Hồng Qu&acirc;n ',
'play_count': int,
'download_count': int,
'artist': 'Huỳnh Hồng Qu&acirc;n ', 'artist': 'Huỳnh Hồng Qu&acirc;n ',
'timestamp': 1651571320, 'timestamp': 1651571320,
'upload_date': '20220503', 'upload_date': '20220503',
@ -44,11 +42,9 @@ class LikeeIE(InfoExtractor):
'comment_count': int, 'comment_count': int,
'like_count': int, 'like_count': int,
'uploader': 'Vương Phước Nhi', 'uploader': 'Vương Phước Nhi',
'download_count': int,
'timestamp': 1651506835, 'timestamp': 1651506835,
'upload_date': '20220502', 'upload_date': '20220502',
'duration': 60024, 'duration': 60024,
'play_count': int,
'artist': 'Vương Phước Nhi', 'artist': 'Vương Phước Nhi',
'uploader_id': '649222262', 'uploader_id': '649222262',
'view_count': int, 'view_count': int,
@ -65,9 +61,7 @@ class LikeeIE(InfoExtractor):
'duration': 9684, 'duration': 9684,
'uploader_id': 'fernanda_rivasg', 'uploader_id': 'fernanda_rivasg',
'view_count': int, 'view_count': int,
'play_count': int,
'artist': 'La Cami La✨', 'artist': 'La Cami La✨',
'download_count': int,
'like_count': int, 'like_count': int,
'uploader': 'Fernanda Rivas🎶', 'uploader': 'Fernanda Rivas🎶',
'timestamp': 1614034308, 'timestamp': 1614034308,
@ -83,13 +77,11 @@ class LikeeIE(InfoExtractor):
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'comment_count': int, 'comment_count': int,
'duration': 18014, 'duration': 18014,
'play_count': int,
'view_count': int, 'view_count': int,
'timestamp': 1611694774, 'timestamp': 1611694774,
'like_count': int, 'like_count': int,
'uploader': 'Fernanda Rivas🎶', 'uploader': 'Fernanda Rivas🎶',
'uploader_id': 'fernanda_rivasg', 'uploader_id': 'fernanda_rivasg',
'download_count': int,
'artist': 'ʟᴇʀɪᴋ_ɴɪʀɴ♡', 'artist': 'ʟᴇʀɪᴋ_ɴɪʀɴ♡',
'upload_date': '20210126', 'upload_date': '20210126',
}, },
@ -128,8 +120,6 @@ class LikeeIE(InfoExtractor):
'description': info.get('share_desc'), 'description': info.get('share_desc'),
'view_count': int_or_none(info.get('video_count')), 'view_count': int_or_none(info.get('video_count')),
'like_count': int_or_none(info.get('likeCount')), 'like_count': int_or_none(info.get('likeCount')),
'play_count': int_or_none(info.get('play_count')),
'download_count': int_or_none(info.get('download_count')),
'comment_count': int_or_none(info.get('comment_count')), 'comment_count': int_or_none(info.get('comment_count')),
'uploader': str_or_none(info.get('nick_name')), 'uploader': str_or_none(info.get('nick_name')),
'uploader_id': str_or_none(info.get('likeeId')), 'uploader_id': str_or_none(info.get('likeeId')),

@ -1,42 +0,0 @@
from .common import InfoExtractor
class LocalNews8IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
'info_dict': {
'id': '35183304',
'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
'ext': 'mp4',
'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
'duration': 153,
'timestamp': 1441844822,
'upload_date': '20150910',
'uploader_id': 'api',
}
}
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
partner_id = self._search_regex(
r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
webpage, 'partner id', group='id')
kaltura_id = self._search_regex(
r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
webpage, 'videl id', group='id')
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
'ie_key': 'Kaltura',
'id': video_id,
'display_id': display_id,
}

@ -1,8 +1,7 @@
from .common import InfoExtractor from .francetv import FranceTVBaseInfoExtractor
from .francetv import FranceTVIE
class LumniIE(InfoExtractor): class LumniIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?lumni\.fr/video/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle', 'url': 'https://www.lumni.fr/video/l-homme-et-son-environnement-dans-la-revolution-industrielle',
@ -21,4 +20,4 @@ class LumniIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'<div[^>]+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id') r'<div[^>]+data-factoryid\s*=\s*["\']([^"\']+)', webpage, 'video id')
return self.url_result(f'francetv:{video_id}', FranceTVIE, video_id) return self._make_url_result(video_id, url=url)

@ -1,107 +0,0 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
dict_get,
float_or_none,
int_or_none,
merge_dicts,
parse_duration,
try_get,
)
class MallTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'md5': 'cd69ce29176f6533b65bff69ed9a5f2a',
'info_dict': {
'id': 't0zzt0',
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'ext': 'mp4',
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
'duration': 216,
'timestamp': 1538870400,
'upload_date': '20181007',
'view_count': int,
'comment_count': int,
'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnigfq/thumbnails/retina.jpg',
'average_rating': 9.060869565217391,
'dislike_count': int,
'like_count': int,
}
}, {
'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
'only_matching': True,
}, {
'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
'only_matching': True,
}, {
'url': 'https://www.mall.tv/zivoty-slavnych/nadeje-vychodu-i-zapadu-jak-michail-gorbacov-zmenil-politickou-mapu-sveta-a-ziskal-za-to-nobelovu-cenu-miru',
'info_dict': {
'id': 'yx010y',
'ext': 'mp4',
'dislike_count': int,
'description': 'md5:aee02bee5a8d072c6a8207b91d1905a9',
'thumbnail': 'https://cdn.vpplayer.tech/agmipnzv/encode/vjsnjdeu/thumbnails/retina.jpg',
'comment_count': int,
'display_id': 'md5:0ec2afa94d2e2b7091c019cef2a43a9b',
'like_count': int,
'duration': 752,
'timestamp': 1646956800,
'title': 'md5:fe79385daaf16d74c12c1ec4a26687af',
'view_count': int,
'upload_date': '20220311',
'average_rating': 9.685714285714285,
}
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(
url, display_id, headers=self.geo_verification_headers())
video = self._parse_json(self._search_regex(
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
webpage, 'video object'), display_id)
video_id = self._search_regex(
r'<input\s*id\s*=\s*player-id-name\s*[^>]+value\s*=\s*(\w+)', webpage, 'video id')
formats = self._extract_m3u8_formats(
video['VideoSource'], video_id, 'mp4', 'm3u8_native')
subtitles = {}
for s in (video.get('Subtitles') or {}):
s_url = s.get('Url')
if not s_url:
continue
subtitles.setdefault(s.get('Language') or 'cz', []).append({
'url': s_url,
})
entity_counts = video.get('EntityCounts') or {}
def get_count(k):
v = entity_counts.get(k + 's') or {}
return int_or_none(dict_get(v, ('Count', 'StrCount')))
info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts({
'id': str(video_id),
'display_id': display_id,
'title': video.get('Title'),
'description': clean_html(video.get('Description')),
'thumbnail': video.get('ThumbnailUrl'),
'formats': formats,
'subtitles': subtitles,
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
'view_count': get_count('View'),
'like_count': get_count('Like'),
'dislike_count': get_count('Dislike'),
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
'comment_count': get_count('Comment'),
}, info)

@ -12,6 +12,7 @@ from ..utils import (
class ManyVidsIE(InfoExtractor): class ManyVidsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)' _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# preview video # preview video

@ -10,6 +10,7 @@ from ..utils import (
class MarkizaIE(InfoExtractor): class MarkizaIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)' _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109', 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
@ -68,6 +69,7 @@ class MarkizaIE(InfoExtractor):
class MarkizaPageIE(InfoExtractor): class MarkizaPageIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_' _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
_TESTS = [{ _TESTS = [{
'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni', 'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',

@ -8,15 +8,15 @@ class MegaphoneIE(InfoExtractor):
_VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)' _VALID_URL = r'https://player\.megaphone\.fm/(?P<id>[A-Z0-9]+)'
_EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})'] _EMBED_REGEX = [rf'<iframe[^>]*?\ssrc=["\'](?P<url>{_VALID_URL})']
_TEST = { _TEST = {
'url': 'https://player.megaphone.fm/GLT9749789991?"', 'url': 'https://player.megaphone.fm/GLT9749789991',
'md5': '4816a0de523eb3e972dc0dda2c191f96', 'md5': '4816a0de523eb3e972dc0dda2c191f96',
'info_dict': { 'info_dict': {
'id': 'GLT9749789991', 'id': 'GLT9749789991',
'ext': 'mp3', 'ext': 'mp3',
'title': '#97 What Kind Of Idiot Gets Phished?', 'title': '#97 What Kind Of Idiot Gets Phished?',
'thumbnail': r're:^https://.*\.png.*$', 'thumbnail': r're:^https://.*\.png.*$',
'duration': 1776.26375, 'duration': 1998.36,
'author': 'Reply All', 'creators': ['Reply All'],
}, },
} }
@ -40,7 +40,7 @@ class MegaphoneIE(InfoExtractor):
'id': video_id, 'id': video_id,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'title': title, 'title': title,
'author': author, 'creators': [author] if author else None,
'duration': episode_data['duration'], 'duration': episode_data['duration'],
'formats': formats, 'formats': formats,
} }

@ -1,36 +0,0 @@
from .common import InfoExtractor
class MiaoPaiIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?miaopai\.com/show/(?P<id>[-A-Za-z0-9~_]+)'
_TEST = {
'url': 'http://www.miaopai.com/show/n~0hO7sfV1nBEw4Y29-Hqg__.htm',
'md5': '095ed3f1cd96b821add957bdc29f845b',
'info_dict': {
'id': 'n~0hO7sfV1nBEw4Y29-Hqg__',
'ext': 'mp4',
'title': '西游记音乐会的秒拍视频',
'thumbnail': 're:^https?://.*/n~0hO7sfV1nBEw4Y29-Hqg___m.jpg',
}
}
_USER_AGENT_IPAD = 'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD})
title = self._html_extract_title(webpage)
thumbnail = self._html_search_regex(
r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)',
webpage, 'thumbnail', fatal=False, group='url')
videos = self._parse_html5_media_entries(url, webpage, video_id)
info = videos[0]
info.update({
'id': video_id,
'title': title,
'thumbnail': thumbnail,
})
return info

@ -1,55 +0,0 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
smuggle_url,
)
class MinistryGridIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])'
_TEST = {
'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers',
'md5': '844be0d2a1340422759c2a9101bab017',
'info_dict': {
'id': '3453494717001',
'ext': 'mp4',
'title': 'The Gospel by Numbers',
'thumbnail': r're:^https?://.*\.jpg',
'upload_date': '20140410',
'description': 'Coming soon from T4G 2014!',
'uploader_id': '2034960640001',
'timestamp': 1397145591,
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['TDSLifeway'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
portlets = self._parse_json(self._search_regex(
r'Liferay\.Portlet\.list=(\[.+?\])', webpage, 'portlet list'),
video_id)
pl_id = self._search_regex(
r'getPlid:function\(\){return"(\d+)"}', webpage, 'p_l_id')
for i, portlet in enumerate(portlets):
portlet_url = 'http://www.ministrygrid.com/c/portal/render_portlet?p_l_id=%s&p_p_id=%s' % (pl_id, portlet)
portlet_code = self._download_webpage(
portlet_url, video_id,
note='Looking in portlet %s (%d/%d)' % (portlet, i + 1, len(portlets)),
fatal=False)
video_iframe_url = self._search_regex(
r'<iframe.*?src="([^"]+)"', portlet_code, 'video iframe',
default=None)
if video_iframe_url:
return self.url_result(
smuggle_url(video_iframe_url, {'force_videoid': video_id}),
video_id=video_id)
raise ExtractorError('Could not find video iframe in any portlets')

@ -1,45 +0,0 @@
from .common import InfoExtractor
class MorningstarIE(InfoExtractor):
IE_DESC = 'morningstar.com'
_VALID_URL = r'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
'info_dict': {
'id': '615869',
'ext': 'mp4',
'title': 'Get Ahead of the Curve on 2013 Taxes',
'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
}
}, {
'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
video_url = self._html_search_regex(
r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
webpage, 'video URL')
thumbnail = self._html_search_regex(
r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
webpage, 'thumbnail', fatal=False)
description = self._html_search_regex(
r'<div id="mstarDeck".*?>(.*?)</div>',
webpage, 'description', fatal=False)
return {
'id': video_id,
'title': title,
'url': video_url,
'thumbnail': thumbnail,
'description': description,
}

@ -5,6 +5,7 @@ from ..compat import (
class MotorsportIE(InfoExtractor): class MotorsportIE(InfoExtractor):
_WORKING = False
IE_DESC = 'motorsport.com' IE_DESC = 'motorsport.com'
_VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
_TEST = { _TEST = {

@ -451,6 +451,7 @@ class MTVVideoIE(MTVServicesInfoExtractor):
class MTVDEIE(MTVServicesInfoExtractor): class MTVDEIE(MTVServicesInfoExtractor):
_WORKING = False
IE_NAME = 'mtv.de' IE_NAME = 'mtv.de'
_VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)' _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
_TESTS = [{ _TESTS = [{

@ -9,6 +9,7 @@ from ..utils import (
class MuenchenTVIE(InfoExtractor): class MuenchenTVIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream' _VALID_URL = r'https?://(?:www\.)?muenchen\.tv/livestream'
IE_DESC = 'münchen.tv' IE_DESC = 'münchen.tv'
_TEST = { _TEST = {

@ -17,11 +17,11 @@ class MusicdexBaseIE(InfoExtractor):
'track_number': track_json.get('number'), 'track_number': track_json.get('number'),
'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'), 'url': format_field(track_json, 'url', 'https://www.musicdex.org/%s'),
'duration': track_json.get('duration'), 'duration': track_json.get('duration'),
'genre': [genre.get('name') for genre in track_json.get('genres') or []], 'genres': [genre.get('name') for genre in track_json.get('genres') or []],
'like_count': track_json.get('likes_count'), 'like_count': track_json.get('likes_count'),
'view_count': track_json.get('plays'), 'view_count': track_json.get('plays'),
'artist': [artist.get('name') for artist in track_json.get('artists') or []], 'artists': [artist.get('name') for artist in track_json.get('artists') or []],
'album_artist': [artist.get('name') for artist in album_json.get('artists') or []], 'album_artists': [artist.get('name') for artist in album_json.get('artists') or []],
'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'), 'thumbnail': format_field(album_json, 'image', 'https://www.musicdex.org/%s'),
'album': album_json.get('name'), 'album': album_json.get('name'),
'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year), 'release_year': try_get(album_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
@ -43,11 +43,11 @@ class MusicdexSongIE(MusicdexBaseIE):
'track': 'dual existence', 'track': 'dual existence',
'track_number': 1, 'track_number': 1,
'duration': 266000, 'duration': 266000,
'genre': ['Anime'], 'genres': ['Anime'],
'like_count': int, 'like_count': int,
'view_count': int, 'view_count': int,
'artist': ['fripSide'], 'artists': ['fripSide'],
'album_artist': ['fripSide'], 'album_artists': ['fripSide'],
'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png', 'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png',
'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence', 'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence',
'release_year': 2020 'release_year': 2020
@ -69,9 +69,9 @@ class MusicdexAlbumIE(MusicdexBaseIE):
'playlist_mincount': 28, 'playlist_mincount': 28,
'info_dict': { 'info_dict': {
'id': '56', 'id': '56',
'genre': ['OST'], 'genres': ['OST'],
'view_count': int, 'view_count': int,
'artist': ['TENMON & Eiichiro Yanagi / minori'], 'artists': ['TENMON & Eiichiro Yanagi / minori'],
'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~', 'title': 'ef - a tale of memories Original Soundtrack 2 ~fortissimo~',
'release_year': 2008, 'release_year': 2008,
'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg', 'thumbnail': 'https://www.musicdex.org/storage/album/2rSHkyYBYfB7sbvElpEyTMcUn6toY7AohOgJuDlE.jpg',
@ -88,9 +88,9 @@ class MusicdexAlbumIE(MusicdexBaseIE):
'id': id, 'id': id,
'title': data_json.get('name'), 'title': data_json.get('name'),
'description': data_json.get('description'), 'description': data_json.get('description'),
'genre': [genre.get('name') for genre in data_json.get('genres') or []], 'genres': [genre.get('name') for genre in data_json.get('genres') or []],
'view_count': data_json.get('plays'), 'view_count': data_json.get('plays'),
'artist': [artist.get('name') for artist in data_json.get('artists') or []], 'artists': [artist.get('name') for artist in data_json.get('artists') or []],
'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'), 'thumbnail': format_field(data_json, 'image', 'https://www.musicdex.org/%s'),
'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year), 'release_year': try_get(data_json, lambda x: date_from_str(unified_strdate(x['release_date'])).year),
'entries': entries, 'entries': entries,

@ -5,6 +5,7 @@ from ..utils import parse_duration, remove_end, unified_strdate, urljoin
class NDTVIE(InfoExtractor): class NDTVIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)' _VALID_URL = r'https?://(?:[^/]+\.)?ndtv\.com/(?:[^/]+/)*videos?/?(?:[^/]+/)*[^/?^&]+-(?P<id>\d+)'
_TESTS = [ _TESTS = [

@ -1,6 +1,7 @@
import itertools import itertools
import json import json
from .art19 import Art19IE
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
@ -112,7 +113,8 @@ class NebulaBaseIE(InfoExtractor):
class NebulaIE(NebulaBaseIE): class NebulaIE(NebulaBaseIE):
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' IE_NAME = 'nebula:video'
_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
'info_dict': { 'info_dict': {
@ -236,8 +238,8 @@ class NebulaIE(NebulaBaseIE):
class NebulaClassIE(NebulaBaseIE): class NebulaClassIE(NebulaBaseIE):
IE_NAME = 'nebula:class' IE_NAME = 'nebula:media'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)' _VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/copyright-for-fun-and-profit/14', 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
'info_dict': { 'info_dict': {
@ -253,6 +255,46 @@ class NebulaClassIE(NebulaBaseIE):
'title': 'Photos, Sculpture, and Video', 'title': 'Photos, Sculpture, and Video',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
'info_dict': {
'ext': 'mp3',
'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
'series_id': '335e8159-d663-491a-888f-1732285706ac',
'modified_timestamp': 1599091504,
'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
'series': 'Extremities',
'modified_date': '20200903',
'upload_date': '20200902',
'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
'release_timestamp': 1571237958,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'duration': 1546.05714,
'timestamp': 1599085608,
'release_date': '20191016',
},
}, {
'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
'info_dict': {
'ext': 'mp3',
'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'episode_number': 1,
'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
'release_date': '20230304',
'modified_date': '20230403',
'series': 'The Layover',
'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
'modified_timestamp': 1680554566,
'duration': 3130.46401,
'release_timestamp': 1677943800,
'title': 'The Layover — Episode 1',
'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
'upload_date': '20230303',
'episode': 'Episode 1',
'timestamp': 1677883672,
'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -268,16 +310,38 @@ class NebulaClassIE(NebulaBaseIE):
metadata = self._call_api( metadata = self._call_api(
f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons', f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
slug, note='Fetching video metadata') slug, note='Fetching class/podcast metadata')
return { content_type = metadata.get('type')
**self._extract_video_metadata(metadata), if content_type == 'lesson':
**self._extract_formats(metadata['id'], slug), return {
} **self._extract_video_metadata(metadata),
**self._extract_formats(metadata['id'], slug),
}
elif content_type == 'podcast_episode':
episode_url = metadata['episode_url']
if not episode_url and metadata.get('premium'):
self.raise_login_required()
if Art19IE.suitable(episode_url):
return self.url_result(episode_url, Art19IE)
return traverse_obj(metadata, {
'id': ('id', {str}),
'url': ('episode_url', {url_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('published_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'channel_id': ('channel_id', {str}),
'chnanel': ('channel_title', {str}),
'thumbnail': ('assets', 'regular', {url_or_none}),
})
raise ExtractorError(f'Unexpected content type {content_type!r}')
class NebulaSubscriptionsIE(NebulaBaseIE): class NebulaSubscriptionsIE(NebulaBaseIE):
IE_NAME = 'nebula:subscriptions' IE_NAME = 'nebula:subscriptions'
_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)' _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/myshows', 'url': 'https://nebula.tv/myshows',
'playlist_mincount': 1, 'playlist_mincount': 1,
@ -310,7 +374,7 @@ class NebulaSubscriptionsIE(NebulaBaseIE):
class NebulaChannelIE(NebulaBaseIE): class NebulaChannelIE(NebulaBaseIE):
IE_NAME = 'nebula:channel' IE_NAME = 'nebula:channel'
_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])' _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://nebula.tv/tom-scott-presents-money', 'url': 'https://nebula.tv/tom-scott-presents-money',
'info_dict': { 'info_dict': {
@ -343,6 +407,14 @@ class NebulaChannelIE(NebulaBaseIE):
'description': 'md5:6690248223eed044a9f11cd5a24f9742', 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
}, },
'playlist_count': 23, 'playlist_count': 23,
}, {
'url': 'https://nebula.tv/trussissuespodcast',
'info_dict': {
'id': 'trussissuespodcast',
'title': 'The TLDR News Podcast',
'description': 'md5:a08c4483bc0b705881d3e0199e721385',
},
'playlist_mincount': 80,
}] }]
def _generate_playlist_entries(self, collection_id, collection_slug): def _generate_playlist_entries(self, collection_id, collection_slug):
@ -365,6 +437,17 @@ class NebulaChannelIE(NebulaBaseIE):
lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}', lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata) {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
def _generate_podcast_entries(self, collection_id, collection_slug):
next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
for page_num in itertools.count(1):
episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
yield self.url_result(episode['share_url'], NebulaClassIE)
next_url = episodes.get('next')
if not next_url:
break
def _real_extract(self, url): def _real_extract(self, url):
collection_slug = self._match_id(url) collection_slug = self._match_id(url)
channel = self._call_api( channel = self._call_api(
@ -373,6 +456,8 @@ class NebulaChannelIE(NebulaBaseIE):
if channel.get('type') == 'class': if channel.get('type') == 'class':
entries = self._generate_class_entries(channel) entries = self._generate_class_entries(channel)
elif channel.get('type') == 'podcast_channel':
entries = self._generate_podcast_entries(channel['id'], collection_slug)
else: else:
entries = self._generate_playlist_entries(channel['id'], collection_slug) entries = self._generate_playlist_entries(channel['id'], collection_slug)

@ -118,7 +118,6 @@ class NekoHackerIE(InfoExtractor):
'artist': 'Neko Hacker', 'artist': 'Neko Hacker',
'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0', 'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
'track_number': 1, 'track_number': 1,
'duration': None
} }
}, },
{ {
@ -136,7 +135,6 @@ class NekoHackerIE(InfoExtractor):
'artist': 'Neko Hacker', 'artist': 'Neko Hacker',
'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )', 'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
'track_number': 2, 'track_number': 2,
'duration': None
} }
}, },
{ {
@ -154,7 +152,6 @@ class NekoHackerIE(InfoExtractor):
'artist': 'Neko Hacker', 'artist': 'Neko Hacker',
'track': '進め!むじなカンパニー (instrumental)', 'track': '進め!むじなカンパニー (instrumental)',
'track_number': 3, 'track_number': 3,
'duration': None
} }
}, },
{ {
@ -172,7 +169,6 @@ class NekoHackerIE(InfoExtractor):
'artist': 'Neko Hacker', 'artist': 'Neko Hacker',
'track': 'むじな de なじむ (instrumental)', 'track': 'むじな de なじむ (instrumental)',
'track_number': 4, 'track_number': 4,
'duration': None
} }
} }
] ]

@ -1,33 +1,38 @@
import datetime
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class NerdCubedFeedIE(InfoExtractor): class NerdCubedFeedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/feed\.json' _VALID_URL = r'https?://(?:www\.)?nerdcubed\.co\.uk/?(?:$|[#?])'
_TEST = { _TEST = {
'url': 'http://www.nerdcubed.co.uk/feed.json', 'url': 'http://www.nerdcubed.co.uk/',
'info_dict': { 'info_dict': {
'id': 'nerdcubed-feed', 'id': 'nerdcubed-feed',
'title': 'nerdcubed.co.uk feed', 'title': 'nerdcubed.co.uk feed',
}, },
'playlist_mincount': 1300, 'playlist_mincount': 5500,
} }
def _real_extract(self, url): def _extract_video(self, feed_entry):
feed = self._download_json(url, url, 'Downloading NerdCubed JSON feed') return self.url_result(
f'https://www.youtube.com/watch?v={feed_entry["id"]}', YoutubeIE,
**traverse_obj(feed_entry, {
'id': ('id', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('publishedAt', {parse_iso8601}),
'channel': ('source', 'name', {str}),
'channel_id': ('source', 'id', {str}),
'channel_url': ('source', 'url', {str}),
'thumbnail': ('thumbnail', 'source', {url_or_none}),
}), url_transparent=True)
entries = [{ def _real_extract(self, url):
'_type': 'url', video_id = 'nerdcubed-feed'
'title': feed_entry['title'], feed = self._download_json('https://www.nerdcubed.co.uk/_/cdn/videos.json', video_id)
'uploader': feed_entry['source']['name'] if feed_entry['source'] else None,
'upload_date': datetime.datetime.strptime(feed_entry['date'], '%Y-%m-%d').strftime('%Y%m%d'),
'url': 'http://www.youtube.com/watch?v=' + feed_entry['youtube_id'],
} for feed_entry in feed]
return { return self.playlist_result(
'_type': 'playlist', map(self._extract_video, traverse_obj(feed, ('videos', lambda _, v: v['id']))),
'title': 'nerdcubed.co.uk feed', video_id, 'nerdcubed.co.uk feed')
'id': 'nerdcubed-feed',
'entries': entries,
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save