From 7b53b6bfeff1670c1c5d3dd299b7ed6162a5e3c0 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Wed, 4 Sep 2019 15:47:27 -0400 Subject: [PATCH] Shrink continuation cursor for YouTube comments --- src/invidious/channels.cr | 2 +- src/invidious/comments.cr | 48 ++++++++++++++++++++++++++++------ src/invidious/helpers/utils.cr | 9 ++++--- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index 107039a6..00eac902 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -461,7 +461,7 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = " case sort_by when "newest" # Empty tags can be omitted - # meta.write(Bytes[0x18,0x00]) + # data.write(Bytes[0x18,0x00]) when "popular" data.write Bytes[0x18, 0x01] when "oldest" diff --git a/src/invidious/comments.cr b/src/invidious/comments.cr index e060fe46..04ba6f5d 100644 --- a/src/invidious/comments.cr +++ b/src/invidious/comments.cr @@ -57,14 +57,22 @@ class RedditListing }) end -def fetch_youtube_comments(id, db, continuation, format, locale, thin_mode, region, sort_by = "top") +def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, sort_by = "top") video = get_video(id, db, region: region) session_token = video.info["session_token"]? - ctoken = produce_comment_continuation(id, cursor: "", sort_by: sort_by) - continuation ||= ctoken + case cursor + when nil, "" + ctoken = produce_comment_continuation(id, cursor: "", sort_by: sort_by) + # when .starts_with? "Ug" + # ctoken = produce_comment_reply_continuation(id, video.ucid, cursor) + when .starts_with? "ADSJ" + ctoken = produce_comment_continuation(id, cursor: cursor, sort_by: sort_by) + else + ctoken = cursor + end - if !continuation || continuation.empty? || !session_token + if !session_token if format == "json" return {"comments" => [] of String}.to_json else @@ -73,6 +81,7 @@ def fetch_youtube_comments(id, db, continuation, format, locale, thin_mode, regi end post_req = { + page_token: ctoken, session_token: session_token, } @@ -89,7 +98,7 @@ def fetch_youtube_comments(id, db, continuation, format, locale, thin_mode, regi headers["x-youtube-client-name"] = "1" headers["x-youtube-client-version"] = "2.20180719" - response = client.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req) + response = client.post("/comment_service_ajax?action_get_comments=1&hl=en&gl=US", headers, form: post_req) response = JSON.parse(response.body) if !response["response"]["continuationContents"]? @@ -216,8 +225,8 @@ def fetch_youtube_comments(id, db, continuation, format, locale, thin_mode, regi end if body["continuations"]? - continuation = body["continuations"][0]["nextContinuationData"]["continuation"] - json.field "continuation", continuation + continuation = body["continuations"][0]["nextContinuationData"]["continuation"].as_s + json.field "continuation", cursor.try &.starts_with?("E") ? continuation : extract_comment_cursor(continuation) end end end @@ -563,6 +572,29 @@ def content_to_comment_html(content) return comment_html end +def extract_comment_cursor(continuation) + continuation = URI.unescape(continuation) + data = IO::Memory.new(Base64.decode(continuation)) + + # 0x12 0x26 + data.pos += 2 + + data.read_byte # => 0x12 + video_id = Bytes.new(data.read_bytes(VarInt)) + data.read video_id + + until data.peek[0] == 0x0a + data.read_byte + end + data.read_byte # 0x0a + data.read_byte if data.peek[0] == 0x0a + + cursor = Bytes.new(data.read_bytes(VarInt)) + data.read cursor + + String.new(cursor) +end + def produce_comment_continuation(video_id, cursor = "", sort_by = "top") data = IO::Memory.new @@ -652,7 +684,7 @@ def produce_comment_reply_continuation(video_id, ucid, comment_id) VarInt.to_io(data, comment_id.size) data.print comment_id - data.write(Bytes[0x22, 0x02, 0x08, 0x00]) # ?? + data.write(Bytes[0x22, 0x02, 0x08, 0x00]) # ? data.write(Bytes[ucid.size + video_id.size + 7]) data.write(Bytes[ucid.size]) diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index b39f65c5..5a813486 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -267,8 +267,8 @@ def get_referer(env, fallback = "/", unroll = true) end struct VarInt - def self.from_io(io : IO, format = IO::ByteFormat::BigEndian) : Int32 - result = 0_i32 + def self.from_io(io : IO, format = IO::ByteFormat::NetworkEndian) : Int32 + result = 0_u32 num_read = 0 loop do @@ -276,18 +276,19 @@ struct VarInt raise "Invalid VarInt" if !byte value = byte & 0x7f - result |= value.to_i32 << (7 * num_read) + result |= value.to_u32 << (7 * num_read) num_read += 1 break if byte & 0x80 == 0 raise "Invalid VarInt" if num_read > 5 end - result + result.to_i32 end def self.to_io(io : IO, value : Int32) io.write_byte 0x00 if value == 0x00 + value = value.to_u32 while value != 0 byte = (value & 0x7f).to_u8