From 73699adfa3f24d95f8851ce61a6fe3f83c0117bd Mon Sep 17 00:00:00 2001 From: magnolia1234 <7676006-magnolia1234@users.noreply.gitlab.com> Date: Tue, 2 Jan 2024 19:44:11 +0100 Subject: [PATCH] Fix Haaretz Group (Semrushbot) --- background.js | 20 ++++++++++++++++++-- changelog.txt | 2 +- custom/manifest.json | 2 +- manifest.json | 2 +- sites.js | 1 + 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/background.js b/background.js index e444de59..d44c73b5 100644 --- a/background.js +++ b/background.js @@ -60,7 +60,7 @@ var remove_cookies = []; var remove_cookies_select_hold, remove_cookies_select_drop; // Set User-Agent -var use_google_bot, use_bing_bot, use_facebook_bot; +var use_google_bot, use_bing_bot, use_facebook_bot, use_semrush_bot; // Set Referer var use_drudgereport_referer, use_facebook_referer, use_google_referer, use_twitter_referer; // Set random IP-address @@ -107,6 +107,7 @@ function initSetRules() { use_google_bot = []; use_bing_bot = []; use_facebook_bot = []; + use_semrush_bot = []; use_drudgereport_referer = []; use_facebook_referer = []; use_google_referer = []; @@ -140,6 +141,8 @@ const userAgentMobileB = "Chrome/115.0.5790.171 Mobile Safari/537.36 (compatible const userAgentDesktopF = 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'; +const userAgentDesktopS = "Mozilla/5.0 (compatible; SemrushBot; +http://www.semrush.com/bot.html)"; + var enabledSites = []; var disabledSites = []; var optionSites = {}; @@ -260,6 +263,10 @@ function addRules(domain, rule) { if (!use_facebook_bot.includes(domain)) use_facebook_bot.push(domain); break; + case 'semrushbot': + if (!use_semrush_bot.includes(domain)) + use_semrush_bot.push(domain); + break; } } if (rule.referer) { @@ -414,7 +421,7 @@ function set_rules(sites, sites_updated, sites_custom) { blockedJsInlineDomains = Object.keys(blockedJsInline); disableJavascriptInline(); use_random_ip = Object.keys(random_ip); - change_headers = use_google_bot.concat(use_bing_bot, use_facebook_bot, use_drudgereport_referer, use_facebook_referer, use_google_referer, use_twitter_referer, use_random_ip); + change_headers = use_google_bot.concat(use_bing_bot, use_facebook_bot, use_semrush_bot, use_drudgereport_referer, use_facebook_referer, use_google_referer, use_twitter_referer, use_random_ip); } // add grouped sites to en/disabledSites (and exclude sites) @@ -1042,6 +1049,7 @@ if (matchUrlDomain(change_headers, details.url) && !ignore_types.includes(detail !(matchUrlDomain('www.wsj.com', details.url)); var bingbotEnabled = matchUrlDomain(use_bing_bot, details.url); var facebookbotEnabled = matchUrlDomain(use_facebook_bot, details.url); + var semrushbotEnabled = matchUrlDomain(use_semrush_bot, details.url); // if referer exists, set it requestHeaders = requestHeaders.map(function (requestHeader) { @@ -1116,6 +1124,14 @@ if (matchUrlDomain(change_headers, details.url) && !ignore_types.includes(detail }) } + // override User-Agent to use Semrushbot + if (semrushbotEnabled) { + requestHeaders.push({ + "name": "User-Agent", + "value": userAgentDesktopS + }) + } + // random IP for sites in use_random_ip let domain_random; if (domain_random = matchUrlDomain(use_random_ip, details.url)) { diff --git a/changelog.txt b/changelog.txt index cb274db8..23811393 100644 --- a/changelog.txt +++ b/changelog.txt @@ -3,7 +3,7 @@ Changelog Bypass Paywalls Clean - Firefox Updates (install signed xpi-file): https://gitlab.com/magnolia1234/bypass-paywalls-firefox-clean/-/releases Post-release -Fix Haaretz.co.il (fetch from archive.is) +Fix Haaretz Group (Semrushbot) Fix The Diplomat (magazine) * v3.4.9.0 (2023-12-31) diff --git a/custom/manifest.json b/custom/manifest.json index c40c7774..508e7c89 100644 --- a/custom/manifest.json +++ b/custom/manifest.json @@ -51,5 +51,5 @@ "webRequestBlocking", "*://*/*" ], - "version": "3.4.9.2" + "version": "3.4.9.3" } diff --git a/manifest.json b/manifest.json index 881e5a9a..f49f1365 100644 --- a/manifest.json +++ b/manifest.json @@ -827,5 +827,5 @@ "*://archive.vn/*", "*://webcache.googleusercontent.com/*" ], - "version": "3.4.9.2" + "version": "3.4.9.3" } diff --git a/sites.js b/sites.js index 38f6efa1..b34007e8 100644 --- a/sites.js +++ b/sites.js @@ -1053,6 +1053,7 @@ var defaultSites = { "themarker.com" ], allow_cookies: 1, + useragent: "semrushbot", cs_dompurify: 1 }, "Haas Mediengruppe (opt-in to custom sites)": {