From 1aec76a0007b1fdd30f74057f0ccb279ce334f6a Mon Sep 17 00:00:00 2001 From: zhgwn <130610452+zhgwn@users.noreply.github.com> Date: Sat, 27 Jan 2024 18:44:13 +0100 Subject: [PATCH] Fixing manyvids --- yt_dlp/extractor/manyvids.py | 163 +++++++---------------------------- 1 file changed, 33 insertions(+), 130 deletions(-) diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index 741745378..8ac24f482 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -1,41 +1,30 @@ -import re +import json from .common import InfoExtractor -from ..utils import ( - determine_ext, - extract_attributes, - int_or_none, - str_to_int, - url_or_none, - urlencode_postdata, -) class ManyVidsIE(InfoExtractor): _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P\d+)' _TESTS = [{ # preview video - 'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', - 'md5': '03f11bb21c52dd12a05be21a5c7dcc97', + 'url': 'https://www.manyvids.com/Video/1856601/Horny-Asian-Bunny-Needs-Cock/', 'info_dict': { - 'id': '133957', + 'id': '1856601', 'ext': 'mp4', - 'title': 'everthing about me (Preview)', - 'uploader': 'ellyxxix', - 'view_count': int, + 'title': 'Horny Asian Bunny Needs Cock', + 'description': "This naughty little Asian bunny is bored and horny. Feed her something hard and big.", + 'uploader': 'nicoledoshi', 'like_count': int, }, }, { # full video - 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', - 'md5': 'bb47bab0e0802c2a60c24ef079dfe60f', + 'url': 'https://www.manyvids.com/Video/4957054/Latex-Dress-JOI/', 'info_dict': { - 'id': '935718', + 'id': '4957054', 'ext': 'mp4', - 'title': 'MY FACE REVEAL', - 'description': 'md5:ec5901d41808b3746fed90face161612', - 'uploader': 'Sarah Calanthe', - 'view_count': int, + 'title': 'Latex Dress JOI', + 'description': "My new latex dress feels so good on my skin! Just wearing it for you makes me horny. Jerk your cock while I show off my latex-clad body from all angles, rubbing my hands over my body and describing how good it feels. I can't help but touch my pussy, and I give you a countdown when I'm close so we can cum together for my new latex dress!", + 'uploader': "Kylee Nash", 'like_count': int, }, }] @@ -43,119 +32,33 @@ class ManyVidsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, ) - try: - webpage = self._download_webpage(real_url, video_id) - except Exception: - # probably useless fallback - webpage = self._download_webpage(url, video_id) + infos_url = f'https://www.manyvids.com/bff/store/video/{video_id}' + info_webpage = self._download_webpage(infos_url, video_id, expected_status=200) + info: dict = json.loads(info_webpage).get('data') - info = self._search_regex( - r'''(]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''', - webpage, 'meta details', default='') - info = extract_attributes(info) + download_url = f'https://www.manyvids.com/bff/store/video/{video_id}/private' + download_webpage = self._download_webpage(download_url, video_id, expected_status=200) + download: dict = json.loads(download_webpage).get('data') - player = self._search_regex( - r'''(]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''', - webpage, 'player details', default='') - player = extract_attributes(player) + is_free: bool = info["isFree"] - video_urls_and_ids = ( - (info.get('data-meta-video'), 'video'), - (player.get('data-video-transcoded'), 'transcoded'), - (player.get('data-video-filepath'), 'filepath'), - (self._og_search_video_url(webpage, secure=False, default=None), 'og_video'), - ) - - def txt_or_none(s, default=None): - return (s.strip() or default) if isinstance(s, str) else default - - uploader = txt_or_none(info.get('data-meta-author')) - - def mung_title(s): - if uploader: - s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s) - return txt_or_none(s) - - title = ( - mung_title(info.get('data-meta-title')) - or self._html_search_regex( - (r']+class=["\']item-title[^>]+>([^<]+)', - r']+class=["\']h2 m-0["\'][^>]*>([^<]+)'), - webpage, 'title', default=None) - or self._html_search_meta( - 'twitter:title', webpage, 'title', fatal=True)) - - title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title - - if any(p in webpage for p in ('preview_videos', '_preview.mp4')): - title += ' (Preview)' - - mv_token = self._search_regex( - r'data-mvtoken=(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'mv token', default=None, group='value') - - if mv_token: - # Sets some cookies - self._download_webpage( - 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php', - video_id, note='Setting format cookies', fatal=False, - data=urlencode_postdata({ - 'mvtoken': mv_token, - 'vid': video_id, - }), headers={ - 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest' - }) - - formats = [] - for v_url, fmt in video_urls_and_ids: - v_url = url_or_none(v_url) - if not v_url: - continue - if determine_ext(v_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - v_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls')) - else: - formats.append({ - 'url': v_url, - 'format_id': fmt, - }) - - self._remove_duplicate_formats(formats) - - for f in formats: - if f.get('height') is None: - f['height'] = int_or_none( - self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None)) - if '/preview/' in f['url']: - f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview'))) - f['preference'] = -10 - if 'transcoded' in f['format_id']: - f['preference'] = f.get('preference', -1) - 1 - - def get_likes(): - likes = self._search_regex( - r'''(]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ), - webpage, 'likes', default='') - likes = extract_attributes(likes) - return int_or_none(likes.get('data-likes')) - - def get_views(): - return str_to_int(self._html_search_regex( - r'''(?s)]*\bclass\s*=["']views-wrapper\b[^>]+>.+?]+>\s*(\d[\d,.]*)\s*''', - webpage, 'view count', default=None)) + title = info["title"] + if title and is_free is False: + title += " (Preview)" + if is_free: + download_url = download["filepath"] + else: + download_url = download.get("teaser", {}).get("filepath") + if download_url is None: + raise ValueError('this video has no preview') + # They are rare, but they exist. You can find such videos using the following link: + # https://www.manyvids.com/Vids/?content_type=1,2,3&other=blocked&search_type=0&sort=13&page=1 return { 'id': video_id, + 'url': download_url, 'title': title, - 'formats': formats, - 'description': txt_or_none(info.get('data-meta-description')), - 'uploader': txt_or_none(info.get('data-meta-author')), - 'thumbnail': ( - url_or_none(info.get('data-meta-image')) - or url_or_none(player.get('data-video-screenshot'))), - 'view_count': get_views(), - 'like_count': get_likes(), + 'description': info.get("description") or None, + 'uploader': info["model"]["displayName"], + 'like_count': int(info["likes"]), }