diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 661b4f6aa..5e2829201 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -29,7 +29,6 @@ logger = logger.getChild('plugins') from searx.plugins import (oa_doi_rewrite, ahmia_filter, hash_plugin, - https_rewrite, infinite_scroll, self_info, search_on_category_select, @@ -165,7 +164,6 @@ def sha_sum(filename): plugins = PluginStore() plugins.register(oa_doi_rewrite) plugins.register(hash_plugin) -plugins.register(https_rewrite) plugins.register(infinite_scroll) plugins.register(self_info) plugins.register(search_on_category_select) diff --git a/searx/plugins/https_rewrite.py b/searx/plugins/https_rewrite.py deleted file mode 100644 index aeb42495e..000000000 --- a/searx/plugins/https_rewrite.py +++ /dev/null @@ -1,233 +0,0 @@ -''' -searx is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -searx is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with searx. If not, see < http://www.gnu.org/licenses/ >. - -(C) 2013- by Adam Tauber, -''' - -import re -from urllib.parse import urlparse -from lxml import etree -from os import listdir, environ -from os.path import isfile, isdir, join -from searx.plugins import logger -from flask_babel import gettext -from searx import searx_dir - - -name = "HTTPS rewrite" -description = gettext('Rewrite HTTP links to HTTPS if possible') -default_on = True -preference_section = 'privacy' - -if 'SEARX_HTTPS_REWRITE_PATH' in environ: - rules_path = environ['SEARX_rules_path'] -else: - rules_path = join(searx_dir, 'plugins/https_rules') - -logger = logger.getChild("https_rewrite") - -# https://gitweb.torproject.org/\ -# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules - -# HTTPS rewrite rules -https_rules = [] - - -# load single ruleset from a xml file -def load_single_https_ruleset(rules_path): - ruleset = () - - # init parser - parser = etree.XMLParser() - - # load and parse xml-file - try: - tree = etree.parse(rules_path, parser) - except: - # TODO, error message - return () - - # get root node - root = tree.getroot() - - # check if root is a node with the name ruleset - # TODO improve parsing - if root.tag != 'ruleset': - return () - - # check if rule is deactivated by default - if root.attrib.get('default_off'): - return () - - # check if rule does only work for specific platforms - if root.attrib.get('platform'): - return () - - hosts = [] - rules = [] - exclusions = [] - - # parse childs from ruleset - for ruleset in root: - # this child define a target - if ruleset.tag == 'target': - # check if required tags available - if not ruleset.attrib.get('host'): - continue - - # convert host-rule to valid regex - host = ruleset.attrib.get('host')\ - .replace('.', r'\.').replace('*', '.*') - - # append to host list - hosts.append(host) - - # this child define a rule - elif ruleset.tag == 'rule': - # check if required tags available - if not ruleset.attrib.get('from')\ - or not ruleset.attrib.get('to'): - continue - - # TODO hack, which convert a javascript regex group - # into a valid python regex group - rule_from = ruleset.attrib['from'].replace('$', '\\') - if rule_from.endswith('\\'): - rule_from = rule_from[:-1] + '$' - rule_to = ruleset.attrib['to'].replace('$', '\\') - if rule_to.endswith('\\'): - rule_to = rule_to[:-1] + '$' - - # TODO, not working yet because of the hack above, - # currently doing that in webapp.py - # rule_from_rgx = re.compile(rule_from, re.I) - - # append rule - try: - rules.append((re.compile(rule_from, re.I | re.U), rule_to)) - except: - # TODO log regex error - continue - - # this child define an exclusion - elif ruleset.tag == 'exclusion': - # check if required tags available - if not ruleset.attrib.get('pattern'): - continue - - exclusion_rgx = re.compile(ruleset.attrib.get('pattern')) - - # append exclusion - exclusions.append(exclusion_rgx) - - # convert list of possible hosts to a simple regex - # TODO compress regex to improve performance - try: - target_hosts = re.compile('^(' + '|'.join(hosts) + ')', re.I | re.U) - except: - return () - - # return ruleset - return (target_hosts, rules, exclusions) - - -# load all https rewrite rules -def load_https_rules(rules_path): - # check if directory exists - if not isdir(rules_path): - logger.error("directory not found: '" + rules_path + "'") - return - - # search all xml files which are stored in the https rule directory - xml_files = [join(rules_path, f) - for f in listdir(rules_path) - if isfile(join(rules_path, f)) and f[-4:] == '.xml'] - - # load xml-files - for ruleset_file in xml_files: - # calculate rewrite-rules - ruleset = load_single_https_ruleset(ruleset_file) - - # skip if no ruleset returned - if not ruleset: - continue - - # append ruleset - https_rules.append(ruleset) - - logger.info('{n} rules loaded'.format(n=len(https_rules))) - - -def https_url_rewrite(result): - skip_https_rewrite = False - # check if HTTPS rewrite is possible - for target, rules, exclusions in https_rules: - - # check if target regex match with url - if target.match(result['parsed_url'].netloc): - # process exclusions - for exclusion in exclusions: - # check if exclusion match with url - if exclusion.match(result['url']): - skip_https_rewrite = True - break - - # skip https rewrite if required - if skip_https_rewrite: - break - - # process rules - for rule in rules: - try: - new_result_url = rule[0].sub(rule[1], result['url']) - except: - break - - # parse new url - new_parsed_url = urlparse(new_result_url) - - # continiue if nothing was rewritten - if result['url'] == new_result_url: - continue - - # get domainname from result - # TODO, does only work correct with TLD's like - # asdf.com, not for asdf.com.de - # TODO, using publicsuffix instead of this rewrite rule - old_result_domainname = '.'.join( - result['parsed_url'].hostname.split('.')[-2:]) - new_result_domainname = '.'.join( - new_parsed_url.hostname.split('.')[-2:]) - - # check if rewritten hostname is the same, - # to protect against wrong or malicious rewrite rules - if old_result_domainname == new_result_domainname: - # set new url - result['url'] = new_result_url - - # target has matched, do not search over the other rules - break - return result - - -def on_result(request, search, result): - if 'parsed_url' not in result: - return True - - if result['parsed_url'].scheme == 'http': - https_url_rewrite(result) - return True - - -load_https_rules(rules_path) diff --git a/searx/plugins/https_rules/00README b/searx/plugins/https_rules/00README deleted file mode 100644 index fcd8a7724..000000000 --- a/searx/plugins/https_rules/00README +++ /dev/null @@ -1,17 +0,0 @@ - diff --git a/searx/plugins/https_rules/Bing.xml b/searx/plugins/https_rules/Bing.xml deleted file mode 100644 index 8b403f108..000000000 --- a/searx/plugins/https_rules/Bing.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Dailymotion.xml b/searx/plugins/https_rules/Dailymotion.xml deleted file mode 100644 index 743100cb7..000000000 --- a/searx/plugins/https_rules/Dailymotion.xml +++ /dev/null @@ -1,69 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Deviantart.xml b/searx/plugins/https_rules/Deviantart.xml deleted file mode 100644 index 7830fc20f..000000000 --- a/searx/plugins/https_rules/Deviantart.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/DuckDuckGo.xml b/searx/plugins/https_rules/DuckDuckGo.xml deleted file mode 100644 index 173a9ad9f..000000000 --- a/searx/plugins/https_rules/DuckDuckGo.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Flickr.xml b/searx/plugins/https_rules/Flickr.xml deleted file mode 100644 index 85c6e8065..000000000 --- a/searx/plugins/https_rules/Flickr.xml +++ /dev/null @@ -1,44 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Github-Pages.xml b/searx/plugins/https_rules/Github-Pages.xml deleted file mode 100644 index d3be58a4c..000000000 --- a/searx/plugins/https_rules/Github-Pages.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - diff --git a/searx/plugins/https_rules/Github.xml b/searx/plugins/https_rules/Github.xml deleted file mode 100644 index a9a3a1e53..000000000 --- a/searx/plugins/https_rules/Github.xml +++ /dev/null @@ -1,94 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Google-mismatches.xml b/searx/plugins/https_rules/Google-mismatches.xml deleted file mode 100644 index de9d3eb18..000000000 --- a/searx/plugins/https_rules/Google-mismatches.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Google.org.xml b/searx/plugins/https_rules/Google.org.xml deleted file mode 100644 index d6cc47881..000000000 --- a/searx/plugins/https_rules/Google.org.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/searx/plugins/https_rules/GoogleAPIs.xml b/searx/plugins/https_rules/GoogleAPIs.xml deleted file mode 100644 index 85a5a8081..000000000 --- a/searx/plugins/https_rules/GoogleAPIs.xml +++ /dev/null @@ -1,143 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/GoogleCanada.xml b/searx/plugins/https_rules/GoogleCanada.xml deleted file mode 100644 index d5eefe816..000000000 --- a/searx/plugins/https_rules/GoogleCanada.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/searx/plugins/https_rules/GoogleImages.xml b/searx/plugins/https_rules/GoogleImages.xml deleted file mode 100644 index 0112001e0..000000000 --- a/searx/plugins/https_rules/GoogleImages.xml +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/GoogleMainSearch.xml b/searx/plugins/https_rules/GoogleMainSearch.xml deleted file mode 100644 index df504d90c..000000000 --- a/searx/plugins/https_rules/GoogleMainSearch.xml +++ /dev/null @@ -1,78 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/GoogleMaps.xml b/searx/plugins/https_rules/GoogleMaps.xml deleted file mode 100644 index 0f82c5267..000000000 --- a/searx/plugins/https_rules/GoogleMaps.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/GoogleMelange.xml b/searx/plugins/https_rules/GoogleMelange.xml deleted file mode 100644 index ec23cd45f..000000000 --- a/searx/plugins/https_rules/GoogleMelange.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - diff --git a/searx/plugins/https_rules/GoogleSearch.xml b/searx/plugins/https_rules/GoogleSearch.xml deleted file mode 100644 index 66b7ffdb0..000000000 --- a/searx/plugins/https_rules/GoogleSearch.xml +++ /dev/null @@ -1,135 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/GoogleServices.xml b/searx/plugins/https_rules/GoogleServices.xml deleted file mode 100644 index 704646b53..000000000 --- a/searx/plugins/https_rules/GoogleServices.xml +++ /dev/null @@ -1,345 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/GoogleShopping.xml b/searx/plugins/https_rules/GoogleShopping.xml deleted file mode 100644 index 6ba69a91d..000000000 --- a/searx/plugins/https_rules/GoogleShopping.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/GoogleSorry.xml b/searx/plugins/https_rules/GoogleSorry.xml deleted file mode 100644 index 72a19210d..000000000 --- a/searx/plugins/https_rules/GoogleSorry.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/searx/plugins/https_rules/GoogleTranslate.xml b/searx/plugins/https_rules/GoogleTranslate.xml deleted file mode 100644 index a004025ae..000000000 --- a/searx/plugins/https_rules/GoogleTranslate.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - diff --git a/searx/plugins/https_rules/GoogleVideos.xml b/searx/plugins/https_rules/GoogleVideos.xml deleted file mode 100644 index a5e88fcf0..000000000 --- a/searx/plugins/https_rules/GoogleVideos.xml +++ /dev/null @@ -1,83 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/GoogleWatchBlog.xml b/searx/plugins/https_rules/GoogleWatchBlog.xml deleted file mode 100644 index afec70c97..000000000 --- a/searx/plugins/https_rules/GoogleWatchBlog.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/searx/plugins/https_rules/Google_App_Engine.xml b/searx/plugins/https_rules/Google_App_Engine.xml deleted file mode 100644 index 851e051d1..000000000 --- a/searx/plugins/https_rules/Google_App_Engine.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/searx/plugins/https_rules/Googleplex.com.xml b/searx/plugins/https_rules/Googleplex.com.xml deleted file mode 100644 index 7ddbb5ba9..000000000 --- a/searx/plugins/https_rules/Googleplex.com.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - diff --git a/searx/plugins/https_rules/OpenStreetMap.xml b/searx/plugins/https_rules/OpenStreetMap.xml deleted file mode 100644 index 58a661823..000000000 --- a/searx/plugins/https_rules/OpenStreetMap.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Rawgithub.com.xml b/searx/plugins/https_rules/Rawgithub.com.xml deleted file mode 100644 index 3868f332a..000000000 --- a/searx/plugins/https_rules/Rawgithub.com.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - diff --git a/searx/plugins/https_rules/Soundcloud.xml b/searx/plugins/https_rules/Soundcloud.xml deleted file mode 100644 index 6958e8cbc..000000000 --- a/searx/plugins/https_rules/Soundcloud.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/ThePirateBay.xml b/searx/plugins/https_rules/ThePirateBay.xml deleted file mode 100644 index 010387b6b..000000000 --- a/searx/plugins/https_rules/ThePirateBay.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Torproject.xml b/searx/plugins/https_rules/Torproject.xml deleted file mode 100644 index 69269af7e..000000000 --- a/searx/plugins/https_rules/Torproject.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Twitter.xml b/searx/plugins/https_rules/Twitter.xml deleted file mode 100644 index 3285f44e0..000000000 --- a/searx/plugins/https_rules/Twitter.xml +++ /dev/null @@ -1,169 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Vimeo.xml b/searx/plugins/https_rules/Vimeo.xml deleted file mode 100644 index f2a3e5764..000000000 --- a/searx/plugins/https_rules/Vimeo.xml +++ /dev/null @@ -1,75 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/WikiLeaks.xml b/searx/plugins/https_rules/WikiLeaks.xml deleted file mode 100644 index 977709d2d..000000000 --- a/searx/plugins/https_rules/WikiLeaks.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/searx/plugins/https_rules/Wikimedia.xml b/searx/plugins/https_rules/Wikimedia.xml deleted file mode 100644 index 9f25831a2..000000000 --- a/searx/plugins/https_rules/Wikimedia.xml +++ /dev/null @@ -1,107 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/Yahoo.xml b/searx/plugins/https_rules/Yahoo.xml deleted file mode 100644 index 33548c4ab..000000000 --- a/searx/plugins/https_rules/Yahoo.xml +++ /dev/null @@ -1,2450 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/searx/plugins/https_rules/YouTube.xml b/searx/plugins/https_rules/YouTube.xml deleted file mode 100644 index bddc2a5f3..000000000 --- a/searx/plugins/https_rules/YouTube.xml +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -