From e7870111e83033e0ac728d5a2d565d1eb146c335 Mon Sep 17 00:00:00 2001 From: David <59258980+zerodytrash@users.noreply.github.com> Date: Tue, 29 Mar 2022 03:05:31 -0700 Subject: [PATCH] [YouTube] Add new age-gate bypass (#3233) Closes #3182 Authored by: zerodytrash, pukkandan --- README.md | 2 +- yt_dlp/extractor/youtube.py | 60 ++++++++++++++++++++++++++----------- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 955a98557..ab729fa4c 100644 --- a/README.md +++ b/README.md @@ -1657,7 +1657,7 @@ The following extractors use this feature: #### youtube * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and auto-translated subtitles respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (Eg: `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but tv_embedded and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats even without `--live-from-start` (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e5097c264..19b4985f6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -217,15 +217,35 @@ INNERTUBE_CLIENTS = { } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2 - } + }, + # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) + # See: https://github.com/zerodytrash/YouTube-Internal-Clients + 'tv_embedded': { + 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', + 'clientVersion': '2.0', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 85 + }, } +def _split_innertube_client(client_name): + variant, *base = client_name.rsplit('.', 1) + if base: + return variant, base[0], variant + base, *variant = client_name.split('_', 1) + return client_name, base, variant[0] if variant else None + + def build_innertube_clients(): THIRD_PARTY = { - 'embedUrl': 'https://google.com', # Can be any valid URL + 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL } - BASE_CLIENTS = ('android', 'web', 'ios', 'mweb') + BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb') priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): @@ -234,15 +254,15 @@ def build_innertube_clients(): ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') - base_client, *variant = client.split('_') + _, base_client, variant = _split_innertube_client(client) ytcfg['priority'] = 10 * priority(base_client) if not variant: - INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) - agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' - agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY - agegate_ytcfg['priority'] -= 1 - elif variant == ['embedded']: + INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg) + embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' + embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY + embedscreen['priority'] -= 3 + elif variant == 'embedded': ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY ytcfg['priority'] -= 2 else: @@ -2956,13 +2976,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): prs = [] def append_client(*client_names): - """ Append the first client name that exists """ + """ Append the first client name that exists but not already used """ for client_name in client_names: - if client_name in INNERTUBE_CLIENTS: - if client_name not in all_clients: + actual_client = _split_innertube_client(client_name)[0] + if actual_client in INNERTUBE_CLIENTS: + if actual_client not in all_clients: clients.append(client_name) - all_clients.add(client_name) - return + all_clients.add(actual_client) + return # Android player_response does not have microFormats which are needed for # extraction of some data. So we return the initial_pr with formats @@ -2977,7 +2998,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): tried_iframe_fallback = False player_url = None while clients: - client = clients.pop() + client, base_client, variant = _split_innertube_client(clients.pop()) player_ytcfg = master_ytcfg if client == 'web' else {} if 'configs' not in self._configuration_arg('player_skip'): player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg @@ -3005,10 +3026,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): prs.append(pr) # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in - if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated: - append_client(client.replace('_agegate', '_creator')) + if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: + append_client(f'{base_client}_creator') elif self._is_agegated(pr): - append_client(f'{client}_embedded', f'{client.replace("_embedded", "")}_agegate') + if variant == 'tv_embedded': + append_client(f'{base_client}_embedded') + elif not variant: + append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') if last_error: if not len(prs):