From e3a3ed8a981d9395c4859b6ef56cd02bc3148db2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 1 Apr 2024 05:31:09 +0530 Subject: [PATCH] [ie, cleanup] No `from` stdlib imports in extractors (#8978) --- yt_dlp/extractor/ard.py | 4 ++-- yt_dlp/extractor/bibeltv.py | 4 ++-- yt_dlp/extractor/bundestag.py | 8 ++++---- yt_dlp/extractor/dtube.py | 4 ++-- yt_dlp/extractor/linkedin.py | 4 ++-- yt_dlp/extractor/masters.py | 1 - yt_dlp/extractor/microsoftstream.py | 4 ++-- yt_dlp/extractor/naver.py | 4 ++-- yt_dlp/extractor/neteasemusic.py | 8 ++++---- yt_dlp/extractor/niconico.py | 5 ++--- yt_dlp/extractor/panopto.py | 10 +++++----- yt_dlp/extractor/polsatgo.py | 4 ++-- yt_dlp/extractor/pr0gramm.py | 6 ++++-- yt_dlp/extractor/prosiebensat1.py | 10 +++++----- yt_dlp/extractor/radiokapital.py | 14 +++++--------- yt_dlp/extractor/rokfin.py | 4 ++-- yt_dlp/extractor/telewebion.py | 11 +++++------ yt_dlp/extractor/tenplay.py | 4 ++-- yt_dlp/extractor/wistia.py | 4 ++-- yt_dlp/extractor/zattoo.py | 4 ++-- 20 files changed, 56 insertions(+), 61 deletions(-) diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 46e68d61e..3db59c5ca 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -1,5 +1,5 @@ +import functools import re -from functools import partial from .common import InfoExtractor from ..utils import ( @@ -349,7 +349,7 @@ class ARDBetaMediathekIE(InfoExtractor): r'(?P.*)', ] - return traverse_obj(patterns, (..., {partial(re.match, string=title)}, { + return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, { 'season_number': ('season_number', {int_or_none}), 'episode_number': ('episode_number', {int_or_none}), 'episode': (( diff --git a/yt_dlp/extractor/bibeltv.py b/yt_dlp/extractor/bibeltv.py index 34464daa1..666b51c56 100644 --- a/yt_dlp/extractor/bibeltv.py +++ b/yt_dlp/extractor/bibeltv.py @@ -1,4 +1,4 @@ -from functools import partial +import functools from .common import InfoExtractor from ..utils import ( @@ -50,7 +50,7 @@ class BibelTVBaseIE(InfoExtractor): **traverse_obj(data, { 'title': 'title', 'description': 'description', - 'duration': ('duration', {partial(int_or_none, scale=1000)}), + 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('schedulingStart', {parse_iso8601}), 'season_number': 'seasonNumber', 'episode_number': 'episodeNumber', diff --git a/yt_dlp/extractor/bundestag.py b/yt_dlp/extractor/bundestag.py index 9fd7c7de1..71f772665 100644 --- a/yt_dlp/extractor/bundestag.py +++ b/yt_dlp/extractor/bundestag.py @@ -1,5 +1,5 @@ +import functools import re -from functools import partial from .common import InfoExtractor from ..networking.exceptions import HTTPError @@ -115,9 +115,9 @@ class BundestagIE(InfoExtractor): note='Downloading metadata overlay', fatal=False, ), { 'title': ( - {partial(get_element_text_and_html_by_tag, 'h3')}, 0, - {partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}), - 'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}), + {functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0, + {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}), + 'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}), })) return result diff --git a/yt_dlp/extractor/dtube.py b/yt_dlp/extractor/dtube.py index bb06c42be..5ea014cf0 100644 --- a/yt_dlp/extractor/dtube.py +++ b/yt_dlp/extractor/dtube.py @@ -1,5 +1,5 @@ import json -from socket import timeout +import socket from .common import InfoExtractor from ..utils import ( @@ -56,7 +56,7 @@ class DTubeIE(InfoExtractor): try: self.to_screen('%s: Checking %s video format URL' % (video_id, format_id)) self._downloader._opener.open(video_url, timeout=5).close() - except timeout: + except socket.timeout: self.to_screen( '%s: %s URL is invalid, skipping' % (video_id, format_id)) continue diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index ad41c0e20..e12f467ef 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -1,4 +1,4 @@ -from itertools import zip_longest +import itertools import re from .common import InfoExtractor @@ -156,7 +156,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): def json2srt(self, transcript_lines, duration=None): srt_data = '' - for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])): + for line, (line_dict, next_dict) in enumerate(itertools.zip_longest(transcript_lines, transcript_lines[1:])): start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption'] end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1 srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time), diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py index 716f1c961..c3c58d7d0 100644 --- a/yt_dlp/extractor/masters.py +++ b/yt_dlp/extractor/masters.py @@ -1,4 +1,3 @@ -from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index 9b50996b7..5f5f16087 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -1,4 +1,4 @@ -from base64 import b64decode +import base64 from .common import InfoExtractor from ..utils import ( @@ -81,7 +81,7 @@ class MicrosoftStreamIE(InfoExtractor): 'url': thumbnail_url, } thumb_name = url_basename(thumbnail_url) - thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4))) + thumb_name = str(base64.b64decode(thumb_name + '=' * (-len(thumb_name) % 4))) thumb.update(parse_resolution(thumb_name)) thumbnails.append(thumb) diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index 806b79082..885557e91 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -4,8 +4,8 @@ import hmac import itertools import json import re +import urllib.parse import time -from urllib.parse import parse_qs, urlparse from .common import InfoExtractor from ..utils import ( @@ -388,7 +388,7 @@ class NaverNowIE(NaverBaseIE): def _real_extract(self, url): show_id = self._match_id(url) - qs = parse_qs(urlparse(url).query) + qs = urllib.parse.parse_qs(urllib.parse.urlparse(url).query) if not self._yes_playlist(show_id, qs.get('shareHightlight')): return self._extract_highlight(show_id, qs['shareHightlight'][0]) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index d332b840c..73b33a9f9 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -1,9 +1,9 @@ +import hashlib import itertools import json +import random import re import time -from hashlib import md5 -from random import randint from .common import InfoExtractor from ..aes import aes_ecb_encrypt, pkcs7_padding @@ -34,7 +34,7 @@ class NetEaseMusicBaseIE(InfoExtractor): request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1') - msg_digest = md5(message).hexdigest() + msg_digest = hashlib.md5(message).hexdigest() data = pkcs7_padding(list(str.encode( f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}'))) @@ -53,7 +53,7 @@ class NetEaseMusicBaseIE(InfoExtractor): '__csrf': '', 'os': 'pc', 'channel': 'undefined', - 'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}', + 'requestId': f'{int(time.time() * 1000)}_{random.randint(0, 1000):04}', **traverse_obj(self._get_cookies(self._API_BASE), { 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}), }) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 6a4624602..5da728fa1 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -4,8 +4,7 @@ import itertools import json import re import time - -from urllib.parse import urlparse +import urllib.parse from .common import InfoExtractor, SearchInfoExtractor from ..networking import Request @@ -957,7 +956,7 @@ class NiconicoLiveIE(InfoExtractor): 'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9', }) - hostname = remove_start(urlparse(urlh.url).hostname, 'sp.') + hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.') latency = try_get(self._configuration_arg('latency'), lambda x: x[0]) if latency not in self._KNOWN_LATENCY: latency = 'high' diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index ddea32d70..52e703e04 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -1,8 +1,8 @@ import calendar -import json +import datetime import functools -from datetime import datetime, timezone -from random import random +import json +import random from .common import InfoExtractor from ..compat import ( @@ -243,7 +243,7 @@ class PanoptoIE(PanoptoBaseIE): invocation_id = delivery_info.get('InvocationId') stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str) if invocation_id and stream_id and duration: - timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/' + timestamp_str = f'/Date({calendar.timegm(datetime.datetime.now(datetime.timezone.utc).timetuple())}000)/' data = { 'streamRequests': [ { @@ -415,7 +415,7 @@ class PanoptoIE(PanoptoBaseIE): 'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), expected_type=lambda x: x or None), 'timestamp': session_start_time - 11640000000 if session_start_time else None, 'duration': delivery.get('Duration'), - 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}', + 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random.random()}', 'average_rating': delivery.get('AverageRating'), 'chapters': self._extract_chapters(timestamps), 'uploader': delivery.get('OwnerDisplayName') or None, diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index 1524a1fb9..1cebb365e 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -1,5 +1,5 @@ -from uuid import uuid4 import json +import uuid from .common import InfoExtractor from ..utils import ( @@ -51,7 +51,7 @@ class PolsatGoIE(InfoExtractor): } def _call_api(self, endpoint, media_id, method, params): - rand_uuid = str(uuid4()) + rand_uuid = str(uuid.uuid4()) res = self._download_json( f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id, note=f'Downloading {method} JSON metadata', diff --git a/yt_dlp/extractor/pr0gramm.py b/yt_dlp/extractor/pr0gramm.py index 66f8a5f44..6b2f57186 100644 --- a/yt_dlp/extractor/pr0gramm.py +++ b/yt_dlp/extractor/pr0gramm.py @@ -1,5 +1,6 @@ +import datetime import json -from urllib.parse import unquote +import urllib.parse from .common import InfoExtractor from ..compat import functools @@ -114,7 +115,7 @@ class Pr0grammIE(InfoExtractor): cookies = self._get_cookies(self.BASE_URL) if 'me' not in cookies: self._download_webpage(self.BASE_URL, None, 'Refreshing verification information') - if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')): + if traverse_obj(cookies, ('me', {lambda x: x.value}, {urllib.parse.unquote}, {json.loads}, 'verified')): flags |= 0b00110 return flags @@ -196,6 +197,7 @@ class Pr0grammIE(InfoExtractor): 'like_count': ('up', {int}), 'dislike_count': ('down', {int}), 'timestamp': ('created', {int}), + 'upload_date': ('created', {int}, {datetime.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}), 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}) }), } diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py index 46e2e8a8f..4c33baec5 100644 --- a/yt_dlp/extractor/prosiebensat1.py +++ b/yt_dlp/extractor/prosiebensat1.py @@ -1,6 +1,6 @@ +import hashlib import re -from hashlib import sha1 from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -42,7 +42,7 @@ class ProSiebenSat1BaseIE(InfoExtractor): 'Downloading protocols JSON', headers=self.geo_verification_headers(), query={ 'access_id': self._ACCESS_ID, - 'client_token': sha1((raw_ct).encode()).hexdigest(), + 'client_token': hashlib.sha1((raw_ct).encode()).hexdigest(), 'video_id': clip_id, }, fatal=False, expected_status=(403,)) or {} error = protocols.get('error') or {} @@ -53,7 +53,7 @@ class ProSiebenSat1BaseIE(InfoExtractor): urls = (self._download_json( self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ 'access_id': self._ACCESS_ID, - 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), + 'client_token': hashlib.sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), 'protocols': self._SUPPORTED_PROTOCOLS, 'server_token': server_token, 'video_id': clip_id, @@ -77,7 +77,7 @@ class ProSiebenSat1BaseIE(InfoExtractor): if not formats: source_ids = [compat_str(source['id']) for source in video['sources']] - client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() sources = self._download_json( 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, @@ -96,7 +96,7 @@ class ProSiebenSat1BaseIE(InfoExtractor): return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate for source_id in source_ids: - client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() urls = self._download_json( 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, clip_id, 'Downloading urls JSON', fatal=False, query={ diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py index 8f9737ac3..5d7d3ddeb 100644 --- a/yt_dlp/extractor/radiokapital.py +++ b/yt_dlp/extractor/radiokapital.py @@ -1,18 +1,14 @@ -from .common import InfoExtractor -from ..utils import ( - clean_html, - traverse_obj, - unescapeHTML, -) - import itertools -from urllib.parse import urlencode +import urllib.parse + +from .common import InfoExtractor +from ..utils import clean_html, traverse_obj, unescapeHTML class RadioKapitalBaseIE(InfoExtractor): def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): return self._download_json( - f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}', + f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urllib.parse.urlencode(qs)}', video_id, note=note) def _parse_episode(self, data): diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 5099f3ae4..56bbccde4 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -1,8 +1,8 @@ +import datetime import itertools import json import re import urllib.parse -from datetime import datetime from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( @@ -156,7 +156,7 @@ class RokfinIE(InfoExtractor): self.raise_login_required('This video is only available to premium users', True, method='cookies') elif scheduled: self.raise_no_formats( - f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}', + f'Stream is offline; scheduled for {datetime.datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}', video_id=video_id, expected=True) uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username')) diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py index 9378ed021..5fdcddd8b 100644 --- a/yt_dlp/extractor/telewebion.py +++ b/yt_dlp/extractor/telewebion.py @@ -1,8 +1,7 @@ from __future__ import annotations - +import functools import json -from functools import partial -from textwrap import dedent +import textwrap from .common import InfoExtractor from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601 @@ -10,7 +9,7 @@ from ..utils.traversal import traverse_obj def _fmt_url(url): - return partial(format_field, template=url, default=None) + return functools.partial(format_field, template=url, default=None) class TelewebionIE(InfoExtractor): @@ -88,7 +87,7 @@ class TelewebionIE(InfoExtractor): if not video_id.startswith('0x'): video_id = hex(int(video_id)) - episode_data = self._call_graphql_api('getEpisodeDetail', video_id, dedent(''' + episode_data = self._call_graphql_api('getEpisodeDetail', video_id, textwrap.dedent(''' queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) { title program { @@ -127,7 +126,7 @@ class TelewebionIE(InfoExtractor): 'formats': ( 'channel', 'descriptor', {str}, {_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')}, - {partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}), + {functools.partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}), })) info_dict['id'] = video_id return info_dict diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index a98275d86..ea4041976 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -1,7 +1,7 @@ import base64 +import datetime import functools import itertools -from datetime import datetime from .common import InfoExtractor from ..networking import HEADRequest @@ -70,7 +70,7 @@ class TenPlayIE(InfoExtractor): username, password = self._get_login_info() if username is None or password is None: self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.') - _timestamp = datetime.now().strftime('%Y%m%d000000') + _timestamp = datetime.datetime.now().strftime('%Y%m%d000000') _auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii') data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={ 'X-Network-Ten-Auth': _auth_header, diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index bce5e8326..f2256fdc6 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -1,6 +1,6 @@ +import base64 import re import urllib.parse -from base64 import b64decode from .common import InfoExtractor from ..networking import HEADRequest @@ -371,7 +371,7 @@ class WistiaChannelIE(WistiaBaseIE): webpage = self._download_webpage(f'https://fast.wistia.net/embed/channel/{channel_id}', channel_id) data = self._parse_json( self._search_regex(r'wchanneljsonp-%s\'\]\s*=[^\"]*\"([A-Za-z0-9=/]*)' % channel_id, webpage, 'jsonp', channel_id), - channel_id, transform_source=lambda x: urllib.parse.unquote_plus(b64decode(x).decode('utf-8'))) + channel_id, transform_source=lambda x: urllib.parse.unquote_plus(base64.b64decode(x).decode('utf-8'))) # XXX: can there be more than one series? series = traverse_obj(data, ('series', 0), default={}) diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index 6bd9ea064..5cc9c5f7a 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -1,5 +1,5 @@ import re -from uuid import uuid4 +import uuid from .common import InfoExtractor from ..compat import compat_str @@ -53,7 +53,7 @@ class ZattooPlatformBaseIE(InfoExtractor): self._request_webpage( '%s/zapi/v3/session/hello' % self._host_url(), None, 'Opening session', data=urlencode_postdata({ - 'uuid': compat_str(uuid4()), + 'uuid': compat_str(uuid.uuid4()), 'lang': 'en', 'app_version': '1.8.2', 'format': 'json',