From 49b4ceaedf92db85177cfa10542bddbed16529c7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Aug 2022 03:20:36 +0530 Subject: [PATCH] [jsinterp] Bring or-par with youtube-dl Partially cherry-picked from: https://github.com/ytdl-org/youtube-dl/commit/d231b56717c73ee597d2e077d11b69ed48a1b02d Authored by pukkandan, dirkf --- README.md | 2 +- test/test_jsinterp.py | 30 +++++++++++++++++++ test/test_youtube_signature.py | 1 + yt_dlp/jsinterp.py | 54 ++++++++++++++++++++++------------ 4 files changed, 67 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 9672a1771..42cbfceba 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ # NEW FEATURES -* Merged with **youtube-dl v2021.12.17+ [commit/adb5294](https://github.com/ytdl-org/youtube-dl/commit/adb5294177265ba35b45746dbb600965076ed150)** and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) +* Merged with **youtube-dl v2021.12.17+ [commit/d231b56](https://github.com/ytdl-org/youtube-dl/commit/d231b56717c73ee597d2e077d11b69ed48a1b02d)** and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 48e2abcf6..c97f6dcfb 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -48,6 +48,9 @@ def test_operators(self): jsi = JSInterpreter('function f(){return 1 << 5;}') self.assertEqual(jsi.call_function('f'), 32) + jsi = JSInterpreter('function f(){return 2 ** 5}') + self.assertEqual(jsi.call_function('f'), 32) + jsi = JSInterpreter('function f(){return 19 & 21;}') self.assertEqual(jsi.call_function('f'), 17) @@ -57,6 +60,12 @@ def test_operators(self): jsi = JSInterpreter('function f(){return []? 2+3: 4;}') self.assertEqual(jsi.call_function('f'), 5) + jsi = JSInterpreter('function f(){return 1 == 2}') + self.assertEqual(jsi.call_function('f'), False) + + jsi = JSInterpreter('function f(){return 0 && 1 || 2;}') + self.assertEqual(jsi.call_function('f'), 2) + def test_array_access(self): jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}') self.assertEqual(jsi.call_function('f'), [5, 2, 7]) @@ -114,6 +123,16 @@ def test_precedence(self): }''') self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) + def test_builtins(self): + jsi = JSInterpreter(''' + function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; } + ''') + self.assertEqual(jsi.call_function('x'), 86000) + jsi = JSInterpreter(''' + function x(dt) { return new Date(dt) - 0; } + ''') + self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000) + def test_call(self): jsi = JSInterpreter(''' function x() { return 2; } @@ -188,6 +207,17 @@ def test_comma(self): ''') self.assertEqual(jsi.call_function('x'), 7) + jsi = JSInterpreter(''' + function x() { a=5; return (a -= 1, a+=3, a); } + ''') + self.assertEqual(jsi.call_function('x'), 7) + + def test_void(self): + jsi = JSInterpreter(''' + function x() { return void 42; } + ''') + self.assertEqual(jsi.call_function('x'), None) + def test_return_function(self): jsi = JSInterpreter(''' function x() { return [1, function(){return 1}][1] } diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 559bdfccf..79bbfc323 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -109,6 +109,7 @@ class TestPlayerInfo(unittest.TestCase): def test_youtube_extract_player_info(self): PLAYER_URLS = ( + ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'), ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'), diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 1af6ee0aa..87f141476 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -17,6 +17,8 @@ ) _NAME_RE = r'[a-zA-Z_$][\w$]*' + +# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence _OPERATORS = { # None => Defined in JSInterpreter._operator '?': None, @@ -26,23 +28,31 @@ '|': operator.or_, '^': operator.xor, - # FIXME: This should actually be below comparision - '>>': operator.rshift, - '<<': operator.lshift, + '===': operator.is_, + '!==': operator.is_not, + '==': operator.eq, + '!=': operator.ne, '<=': operator.le, '>=': operator.ge, '<': operator.lt, '>': operator.gt, + '>>': operator.rshift, + '<<': operator.lshift, + '+': operator.add, '-': operator.sub, '*': operator.mul, '/': operator.truediv, '%': operator.mod, + + '**': operator.pow, } +_COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'} + _MATCHING_PARENS = dict(zip('({[', ')}]')) _QUOTES = '\'"' @@ -81,7 +91,7 @@ def __delitem__(self, key): class Debugger: import sys - ENABLED = 'pytest' in sys.modules + ENABLED = False and 'pytest' in sys.modules @staticmethod def write(*args, level=100): @@ -200,7 +210,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100): if should_return: return ret, should_return - m = re.match(r'(?Pvar\s)|return(?:\s+|$)', stmt) + m = re.match(r'(?P(?:var|const|let)\s)|return(?:\s+|$)', stmt) if m: expr = stmt[len(m.group(0)):].strip() should_return = not m.group('var') @@ -218,13 +228,18 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100): obj = expr[4:] if obj.startswith('Date('): left, right = self._separate_at_paren(obj[4:], ')') - expr = unified_timestamp(left[1:-1], False) + expr = unified_timestamp( + self.interpret_expression(left, local_vars, allow_recursion), False) if not expr: raise self.Exception(f'Failed to parse date {left!r}', expr) expr = self._dump(int(expr * 1000), local_vars) + right else: raise self.Exception(f'Unsupported object {obj}', expr) + if expr.startswith('void '): + left = self.interpret_expression(expr[5:], local_vars, allow_recursion) + return None, should_return + if expr.startswith('{'): inner, outer = self._separate_at_paren(expr, '}') inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) @@ -307,7 +322,8 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100): if default: matched = matched or case == 'default' elif not matched: - matched = case != 'default' and switch_val == self.interpret_expression(case, local_vars, allow_recursion) + matched = (case != 'default' + and switch_val == self.interpret_expression(case, local_vars, allow_recursion)) if not matched: continue try: @@ -347,7 +363,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100): m = re.match(fr'''(?x) (?P (?P{_NAME_RE})(?:\[(?P[^\]]+?)\])?\s* - (?P{"|".join(map(re.escape, _OPERATORS))})? + (?P{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})? =(?P.*)$ )|(?P (?!if|return|true|false|null|undefined)(?P{_NAME_RE})$ @@ -397,12 +413,14 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100): for op in _OPERATORS: separated = list(self._separate(expr, op)) - if len(separated) < 2: - continue right_expr = separated.pop() - while op == '-' and len(separated) > 1 and not separated[-1].strip(): - right_expr = f'-{right_expr}' + while op in '<>*-' and len(separated) > 1 and not separated[-1].strip(): separated.pop() + right_expr = f'{op}{right_expr}' + if op != '-': + right_expr = f'{separated.pop()}{op}{right_expr}' + if not separated: + continue left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) return self._operator(op, 0 if left_val is None else left_val, right_expr, expr, local_vars, allow_recursion), should_return @@ -564,8 +582,8 @@ def extract_object(self, objname): # Currently, it only supports function definitions fields_m = re.finditer( r'''(?x) - (?P%s)\s*:\s*function\s*\((?P[a-z,]+)\){(?P[^}]+)} - ''' % _FUNC_NAME_RE, + (?P%s)\s*:\s*function\s*\((?P(?:%s|,)*)\){(?P[^}]+)} + ''' % (_FUNC_NAME_RE, _NAME_RE), fields) for f in fields_m: argnames = f.group('args').split(',') @@ -580,7 +598,7 @@ def extract_function_code(self, funcname): (?: function\s+%(name)s| [{;,]\s*%(name)s\s*=\s*function| - var\s+%(name)s\s*=\s*function + (?:var|const|let)\s+%(name)s\s*=\s*function )\s* \((?P[^)]*)\)\s* (?P{.+})''' % {'name': re.escape(funcname)}, @@ -615,10 +633,8 @@ def build_function(self, argnames, code, *global_stack): argnames = tuple(argnames) def resf(args, kwargs={}, allow_recursion=100): - global_stack[0].update({ - **dict(itertools.zip_longest(argnames, args, fillvalue=None)), - **kwargs - }) + global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None)) + global_stack[0].update(kwargs) var_stack = LocalNameSpace(*global_stack) ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1) if should_abort: