From 324aa96062cf8fe27504521989e12349ee917e3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sat, 27 Mar 2021 15:54:33 +0100 Subject: [PATCH 1/3] [enh] add PostgreSQL engine Slightly modified merge of [22079ff] from searx. [22079ff] https://github.com/searx/searx/commit/22079ffdefb07cf69486a232e823797e737014e9 Signed-off-by: Markus Heiser --- searx/engines/postgresql.py | 69 +++++++++++++++++++++++++++++++++++++ searx/settings.yml | 10 ++++++ 2 files changed, 79 insertions(+) create mode 100644 searx/engines/postgresql.py diff --git a/searx/engines/postgresql.py b/searx/engines/postgresql.py new file mode 100644 index 000000000..eb4e9b0b4 --- /dev/null +++ b/searx/engines/postgresql.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-function-docstring +"""PostgreSQL database (offline) + +""" + +# import error is ignored because the admin has to install mysql manually to use +# the engine +import psycopg2 # pylint: disable=import-error + +engine_type = 'offline' +host = "127.0.0.1" +port = "5432" +database = "" +username = "" +password = "" +query_str = "" +limit = 10 +paging = True +result_template = 'key-value.html' +_connection = None + +def init(engine_settings): + global _connection # pylint: disable=global-statement + + if 'query_str' not in engine_settings: + raise ValueError('query_str cannot be empty') + + if not engine_settings['query_str'].lower().startswith('select '): + raise ValueError('only SELECT query is supported') + + _connection = psycopg2.connect( + database = database, + user = username, + password = password, + host = host, + port = port, + ) + +def search(query, params): + query_params = {'query': query} + query_to_run = ( + query_str + + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) + ) + + with _connection: + with _connection.cursor() as cur: + cur.execute(query_to_run, query_params) + return _fetch_results(cur) + +def _fetch_results(cur): + results = [] + titles = [] + + try: + titles = [column_desc.name for column_desc in cur.description] + + for res in cur: + result = dict(zip(titles, map(str, res))) + result['template'] = result_template + results.append(result) + + # no results to fetch + except psycopg2.ProgrammingError: + pass + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 1675bcca7..c706c853c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -854,6 +854,16 @@ engines: url: https://thepiratebay.org/ timeout : 3.0 +# Required dependency: psychopg2 +# - name : postgresql +# engine : postgresql +# database : postgres +# username : postgres +# password : postgres +# limit : 10 +# query_str : 'SELECT * from my_table WHERE my_column = %(query)s' +# shortcut : psql + - name : pubmed engine : pubmed shortcut : pub From 87a01a173663051b542b45c8092110c4877dc8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sun, 28 Mar 2021 21:31:14 +0200 Subject: [PATCH 2/3] [enh] add MySQL engine Slightly modified merge of [c00a33fe] from searx. [c00a33fe] https://github.com/searx/searx/commit/c00a33feee0886dc2b42f4c7b43731210597e5b7 Signed-off-by: Markus Heiser --- searx/engines/mysql_server.py | 57 +++++++++++++++++++++++++++++++++++ searx/settings.yml | 10 ++++++ 2 files changed, 67 insertions(+) create mode 100644 searx/engines/mysql_server.py diff --git a/searx/engines/mysql_server.py b/searx/engines/mysql_server.py new file mode 100644 index 000000000..42b436f5e --- /dev/null +++ b/searx/engines/mysql_server.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +# pylint: disable=missing-function-docstring +"""MySQL database (offline) + +""" + +# import error is ignored because the admin has to install mysql manually to use +# the engine +import mysql.connector # pylint: disable=import-error + +engine_type = 'offline' +auth_plugin = 'caching_sha2_password' +host = "127.0.0.1" +database = "" +username = "" +password = "" +query_str = "" +limit = 10 +paging = True +result_template = 'key-value.html' +_connection = None + +def init(engine_settings): + global _connection # pylint: disable=global-statement + + if 'query_str' not in engine_settings: + raise ValueError('query_str cannot be empty') + + if not engine_settings['query_str'].lower().startswith('select '): + raise ValueError('only SELECT query is supported') + + _connection = mysql.connector.connect( + database = database, + user = username, + password = password, + host = host, + auth_plugin=auth_plugin, + ) + +def search(query, params): + query_params = {'query': query} + query_to_run = query_str + ' LIMIT {0} OFFSET {1}'.format(limit, (params['pageno'] - 1) * limit) + + with _connection.cursor() as cur: + cur.execute(query_to_run, query_params) + + return _fetch_results(cur) + +def _fetch_results(cur): + results = [] + for res in cur: + result = dict(zip(cur.column_names, map(str, res))) + result['template'] = result_template + results.append(result) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index c706c853c..7b9c5968e 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1225,6 +1225,16 @@ engines: # See : http://mymemory.translated.net/doc/usagelimits.php # api_key : '' +# Required dependency: mysql-connector-python +# - name : mysql +# engine : mysql_server +# database : mydatabase +# username : user +# password : pass +# limit : 10 +# query_str : 'SELECT * from mytable WHERE fieldname=%(query)s' +# shortcut : mysql + - name : 1337x engine : 1337x shortcut : 1337x From e7a4d0961b7e48ee5c3a770cb495a0bb943464ae Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 28 May 2021 19:40:29 +0200 Subject: [PATCH 3/3] [doc] add blog post about SQL servers Modified merge of [22a79a4] from searx. [22a79a4] https://github.com/searx/searx/commit/22a79a48961d07d8b7c47cebc3f51a57a5ef5cfc Signed-off-by: Markus Heiser --- docs/blog/index.rst | 1 + docs/blog/sql-engines.rst | 160 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+) create mode 100644 docs/blog/sql-engines.rst diff --git a/docs/blog/index.rst b/docs/blog/index.rst index 8651cef69..a396ecaf6 100644 --- a/docs/blog/index.rst +++ b/docs/blog/index.rst @@ -13,3 +13,4 @@ Blog private-engines command-line-engines search-indexer-engines + sql-engines diff --git a/docs/blog/sql-engines.rst b/docs/blog/sql-engines.rst new file mode 100644 index 000000000..413316054 --- /dev/null +++ b/docs/blog/sql-engines.rst @@ -0,0 +1,160 @@ +=========== +SQL engines +=========== + +.. sidebar:: further read + + - `SQLite `_ + - `PostgreSQL `_ + - `MySQL `_ + +With the *SQL engines* you can bind SQL databases into SearxNG. The following +Relational Database Management System (RDBMS) are supported: + +- :ref:`engine sqlite` +- :ref:`engine postgresql` +- :ref:`engine mysql_server` + +All of the engines above are just commented out in the :origin:`settings.yml +`, as you have to set the required attributes for the +engines, e.g. ``database:`` ... + +.. code:: yaml + + - name: ... + engine: {sqlite|postgresql|mysql_server} + database: ... + result_template: {template_name} + query_str: ... + +By default, the engines use the ``key-value`` template for displaying results / +see :origin:`oscar ` & +:origin:`simple ` +themes. If you are not satisfied with the original result layout, you can use +your own template, set ``result_template`` attribute to ``{template_name}`` and +place the templates at:: + + searx/templates/{theme_name}/result_templates/{template_name} + +As mentioned in previous blog posts, if you do not wish to expose these engines +on a public instance, you can still add them and limit the access by setting +``tokens`` as described in section :ref:`private engines`. + +Configure the engines +===================== + +The configuration of the new database engines are similar. You must put a valid +SQL-SELECT query in ``query_str``. At the moment you can only bind at most one +parameter in your query. By setting the attribute ``limit`` you can define how +many results you want from the SQL server. Basically, it is the same as the +``LIMIT`` keyword in SQL. + +Please, do not include ``LIMIT`` or ``OFFSET`` in your SQL query as the engines +rely on these keywords during paging. If you want to configure the number of +returned results use the option ``limit``. + +.. _engine sqlite: + +SQLite +------ + +.. _MediathekView: https://mediathekview.de/ + +SQLite is a small, fast and reliable SQL database engine. It does not require +any extra dependency. To demonstrate the power of database engines, here is a +more complex example which reads from a MediathekView_ (DE) movie database. For +this example of the SQlite engine download the database: + +- https://liste.mediathekview.de/filmliste-v2.db.bz2 + +and unpack into ``searx/data/filmliste-v2.db``. To search the database use e.g +Query to test: ``!mediathekview concert`` + +.. code:: yaml + + - name: mediathekview + engine: sqlite + disabled: False + categories: general + result_template: default.html + database: searx/data/filmliste-v2.db + query_str: >- + SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + description AS content + FROM film + WHERE title LIKE :wildcard OR description LIKE :wildcard + ORDER BY duration DESC + + +Extra Dependencies +------------------ + +For using :ref:`engine postgresql` or :ref:`engine mysql_server` you need to +install additional packages in Python's Virtual Environment of your SearxNG +instance. To switch into the environment (:ref:`searx-src`) you can use +:ref:`searx.sh`:: + + $ sudo utils/searx.sh shell + (searx-pyenv)$ pip install ... + + +.. _engine postgresql: + +PostgreSQL +---------- + +.. _psycopg2: https://www.psycopg.org/install + +.. sidebar:: requirements + + ``pip install`` psycopg2_ + +PostgreSQL is a powerful and robust open source database. Before configuring +the PostgreSQL engine, you must install the dependency ``psychopg2``. You can +find an example configuration below: + +.. code:: yaml + + - name: my_database + engine: postgresql + database: my_database + username: searx + password: password + query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + +.. _engine mysql_server: + +MySQL +----- + +.. _mysql-connector-python: https://pypi.org/project/mysql-connector-python + +.. sidebar:: requirements + + ``pip install`` mysql-connector-python_ + +MySQL is said to be the most popular open source database. Before enabling MySQL +engine, you must install the package ``mysql-connector-python``. + +The authentication plugin is configurable by setting ``auth_plugin`` in the +attributes. By default it is set to ``caching_sha2_password``. This is an +example configuration for quering a MySQL server: + +.. code:: yaml + + - name: my_database + engine: mysql_server + database: my_database + username: searx + password: password + limit: 5 + query_str: 'SELECT * from my_table WHERE my_column=%(query)s' + + +Acknowledgement +=============== + +This development was sponsored by `Search and Discovery Fund +`_ of `NLnet Foundation `_ . +