From 369ec017b76f65c8202581261165c9423631e1f0 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 4 Jan 2017 14:01:29 +0100 Subject: [PATCH] [enh] standalone_seax.py is a command line interface to searx with JSON output. --- ...{standalone_search.py => google_search.py} | 0 utils/standalone_searx.py | 101 ++++++++++++++++++ 2 files changed, 101 insertions(+) rename utils/{standalone_search.py => google_search.py} (100%) create mode 100755 utils/standalone_searx.py diff --git a/utils/standalone_search.py b/utils/google_search.py similarity index 100% rename from utils/standalone_search.py rename to utils/google_search.py diff --git a/utils/standalone_searx.py b/utils/standalone_searx.py new file mode 100755 index 000000000..b19df4bae --- /dev/null +++ b/utils/standalone_searx.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2016- by Alexandre Flament, +''' + +# set path +from sys import path +from os.path import realpath, dirname +path.append(realpath(dirname(realpath(__file__)) + '/../')) + +# initialization +from json import dumps +from searx import settings +import searx.query +import searx.search +import searx.engines +import searx.preferences +import argparse + +searx.engines.initialize_engines(settings['engines']) + +# command line parsing +parser = argparse.ArgumentParser(description='Standalone searx.') +parser.add_argument('query', type=str, + help='Text query') +parser.add_argument('--category', type=str, nargs='?', + choices=searx.engines.categories.keys(), + default='general', + help='Search category') +parser.add_argument('--lang', type=str, nargs='?',default='all', + help='Search language') +parser.add_argument('--pageno', type=int, nargs='?', default=1, + help='Page number starting from 1') +parser.add_argument('--safesearch', type=str, nargs='?', choices=['0', '1', '2'], default='0', + help='Safe content filter from none to strict') +parser.add_argument('--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], + help='Filter by time range') +args = parser.parse_args() + +# search results for the query +form = { + "q":args.query, + "categories":args.category.decode('utf-8'), + "pageno":str(args.pageno), + "language":args.lang, + "time_range":args.timerange +} +preferences = searx.preferences.Preferences(['oscar'], searx.engines.categories.keys(), searx.engines.engines, []) +preferences.key_value_settings['safesearch'].parse(args.safesearch) + +search_query = searx.search.get_search_query_from_webapp(preferences, form) +search = searx.search.Search(search_query) +result_container = search.search() + +# output +from datetime import datetime + +def no_parsed_url(results): + for result in results: + del result['parsed_url'] + return results + +def json_serial(obj): + """JSON serializer for objects not serializable by default json code""" + if isinstance(obj, datetime): + serial = obj.isoformat() + return serial + raise TypeError ("Type not serializable") + +result_container_json = { + "search": { + "q": search_query.query, + "pageno": search_query.pageno, + "lang": search_query.lang, + "safesearch": search_query.safesearch, + "timerange": search_query.time_range, + "engines": search_query.engines + }, + "results": no_parsed_url(result_container.get_ordered_results()), + "infoboxes": result_container.infoboxes, + "suggestions": list(result_container.suggestions), + "answers": list(result_container.answers), + "paging": result_container.paging, + "results_number": result_container.results_number() +} + +print(dumps(result_container_json, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8", default=json_serial))