diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 000000000..d7ec87921 --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,133 @@ +;;; .dir-locals.el +;; +;; If you get ``*** EPC Error ***`` (even after a jedi:install-server) in your +;; emacs session, mostly you have jedi-mode enabled but the python enviroment is +;; missed. The python environment has to be next to the +;; ``/.dir-locals.el`` in:: +;; +;; ./local/py3 +;; +;; In Emacs, some buffer locals are referencing the project environment: +;; +;; - prj-root --> / +;; - python-environment-directory --> /local +;; - python-environment-default-root-name --> py3 +;; - python-shell-virtualenv-root --> /local/py3 +;; When this variable is set with the path of the virtualenv to use, +;; `process-environment' and `exec-path' get proper values in order to run +;; shells inside the specified virtualenv, example:: +;; (setq python-shell-virtualenv-root "/path/to/env/") +;; +;; To setup such an environment build target 'pyenv' or 'pyenvinstall':: +;; +;; $ make pyenvinstall +;; +;; Alternatively create the virtualenv, source it and install jedi + epc +;; (required by `emacs-jedi `_):: +;; +;; $ virtualenv --python=python3 "--no-site-packages" ./local/py3 +;; ... +;; $ source ./local/py3/bin/activate +;; (py3)$ # now install into the activated 'py3' environment .. +;; (py3)$ pip install jedi epc +;; ... +;; +;; Here is what also I found useful to add to my .emacs:: +;; +;; (global-set-key [f6] 'flycheck-mode) +;; (add-hook 'python-mode-hook 'my:python-mode-hook) +;; +;; (defun my:python-mode-hook () +;; (add-to-list 'company-backends 'company-jedi) +;; (require 'jedi-core) +;; (jedi:setup) +;; (define-key python-mode-map (kbd "C-c C-d") 'jedi:show-doc) +;; (define-key python-mode-map (kbd "M-.") 'jedi:goto-definition) +;; (define-key python-mode-map (kbd "M-,") 'jedi:goto-definition-pop-marker) +;; ) +;; + +((nil + . ((fill-column . 80) + )) + (python-mode + . ((indent-tabs-mode . nil) + + ;; project root folder is where the `.dir-locals.el' is located + (eval . (setq-local + prj-root (locate-dominating-file default-directory ".dir-locals.el"))) + + (eval . (setq-local + python-environment-directory (expand-file-name "./local" prj-root))) + + ;; use 'py3' enviroment as default + (eval . (setq-local + python-environment-default-root-name "py3")) + + (eval . (setq-local + python-shell-virtualenv-root + (concat python-environment-directory + "/" + python-environment-default-root-name))) + + ;; python-shell-virtualenv-path is obsolete, use python-shell-virtualenv-root! + ;; (eval . (setq-local + ;; python-shell-virtualenv-path python-shell-virtualenv-root)) + + (eval . (setq-local + python-shell-interpreter + (expand-file-name "bin/python" python-shell-virtualenv-root))) + + (eval . (setq-local + python-environment-virtualenv + (list (expand-file-name "bin/virtualenv" python-shell-virtualenv-root) + ;;"--system-site-packages" + "--quiet"))) + + (eval . (setq-local + pylint-command + (expand-file-name "bin/pylint" python-shell-virtualenv-root))) + + ;; pylint will find the '.pylintrc' file next to the CWD + ;; https://pylint.readthedocs.io/en/latest/user_guide/run.html#command-line-options + (eval . (setq-local + flycheck-pylintrc ".pylintrc")) + + ;; flycheck & other python stuff should use the local py3 environment + (eval . (setq-local + flycheck-python-pylint-executable python-shell-interpreter)) + + ;; use 'M-x jedi:show-setup-info' and 'M-x epc:controller' to inspect jedi server + + ;; https://tkf.github.io/emacs-jedi/latest/#jedi:environment-root -- You + ;; can specify a full path instead of a name (relative path). In that case, + ;; python-environment-directory is ignored and Python virtual environment + ;; is created at the specified path. + (eval . (setq-local jedi:environment-root python-shell-virtualenv-root)) + + ;; https://tkf.github.io/emacs-jedi/latest/#jedi:server-command + (eval .(setq-local + jedi:server-command + (list python-shell-interpreter + jedi:server-script) + )) + + ;; jedi:environment-virtualenv --> see above 'python-environment-virtualenv' + ;; is set buffer local! No need to setup jedi:environment-virtualenv: + ;; + ;; Virtualenv command to use. A list of string. If it is nil, + ;; python-environment-virtualenv is used instead. You must set non-nil + ;; value to jedi:environment-root in order to make this setting work. + ;; + ;; https://tkf.github.io/emacs-jedi/latest/#jedi:environment-virtualenv + ;; + ;; (eval . (setq-local + ;; jedi:environment-virtualenv + ;; (list (expand-file-name "bin/virtualenv" python-shell-virtualenv-root) + ;; ;;"--python" + ;; ;;"/usr/bin/python3.4" + ;; ))) + + ;; jedi:server-args + + ))) diff --git a/.gitignore b/.gitignore index db20da83e..828856f4c 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,6 @@ setup.cfg node_modules/ .tx/ + +local/ +searx.egg-info/ diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 000000000..3b4adb2ca --- /dev/null +++ b/.pylintrc @@ -0,0 +1,444 @@ +# -*- coding: utf-8; mode: conf -*- +# lint Python modules using external checkers. +# +# This is the main checker controlling the other ones and the reports +# generation. It is itself both a raw checker and an astng checker in order +# to: +# * handle message activation / deactivation at the module level +# * handle some basic but necessary stats'data (number of classes, methods...) +# +[MASTER] + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS, .git, .svn + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. +jobs=1 + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Specify a configuration file. +#rcfile= + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=bad-whitespace, duplicate-code + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable= + + +[REPORTS] + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details + +# HINT: do not set this here, use argument --msg-template=... +#msg-template={path}:{line}: [{msg_id}({symbol}),{obj}] {msg} + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio).You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. + +# HINT: do not set this here, use argument --output-format=... +#output-format=text + +# Tells whether to display a full report or only the messages +reports=no + +# Activate the evaluation score. +score=yes + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + + +[BASIC] + +# List of builtins function names that should not be used, separated by a comma +bad-functions=map,filter,apply,input + +# Naming hint for argument names +argument-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ + +# Regular expression matching correct argument names +argument-rgx=(([a-z][a-zA-Z0-9_]{2,30})|(_[a-z0-9_]*))$ + +# Naming hint for attribute names +attr-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ + +# Regular expression matching correct attribute names +attr-rgx=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*)|([A-Z0-9_]*))$ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Naming hint for class attribute names +class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Naming hint for class names +class-name-hint=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Naming hint for constant names +const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression matching correct constant names +const-rgx=(([a-zA-Z_][a-zA-Z0-9_]*)|(__.*__))$ +#const-rgx=[f]?[A-Z_][a-zA-Z0-9_]{2,30}$ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming hint for function names +function-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ + +# Regular expression matching correct function names +function-rgx=(([a-z][a-zA-Z0-9_]{2,30})|(_[a-z0-9_]*))$ + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_,log,cfg,id + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# Naming hint for inline iteration names +inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Naming hint for method names +method-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ + +# Regular expression matching correct method names +method-rgx=(([a-z][a-zA-Z0-9_]{2,30})|(_[a-z0-9_]*))$ + +# Naming hint for module names +module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression matching correct module names +#module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ +module-rgx=([a-z_][a-z0-9_]*)$ + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes=abc.abstractproperty + +# Naming hint for variable names +variable-name-hint=(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$ + +# Regular expression matching correct variable names +variable-rgx=(([a-z][a-zA-Z0-9_]{2,30})|(_[a-z0-9_]*)|([a-z]))$ + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=120 + +# Maximum number of lines in a module +max-module-lines=2000 + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check=trailing-comma,dict-separator + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement.No config file found, using default configuration + +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[SIMILARITIES] + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,future.builtins + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=8 + +# Maximum number of attributes for a class (see R0902). +max-attributes=20 + +# Maximum number of boolean expressions in a if statement +max-bool-expr=5 + +# Maximum number of branch for function / method body +max-branches=12 + +# Maximum number of locals for function / method body +max-locals=20 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of statements in function / method body +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[IMPORTS] + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=optparse,tkinter.tix + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception diff --git a/AUTHORS.rst b/AUTHORS.rst index 674bfd758..2a2f19219 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -1,4 +1,4 @@ -Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament and Noémi Ványi. +Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament, Noémi Ványi, @pofilo and Markus Heiser. Major contributing authors: @@ -9,6 +9,8 @@ Major contributing authors: - @Cqoicebordel - Noémi Ványi - Marc Abonce Seguin @a01200356 +- @pofilo +- Markus Heiser @return42 People who have submitted patches/translates, reported bugs, consulted features or generally made searx better: diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..77ffe489f --- /dev/null +++ b/Makefile @@ -0,0 +1,62 @@ +# -*- coding: utf-8; mode: makefile-gmake -*- + +PYOBJECTS = searx +PY_SETUP_EXTRAS ?= \[test\] + +include utils/makefile.include +include utils/makefile.python + +all: clean install + +PHONY += help +help: + @echo ' test - run developer tests' + @echo ' run - run developer instance' + @echo ' install - developer install (./local)' + @echo ' uninstall - uninstall (./local)' + @echo '' + @$(MAKE) -s -f utils/makefile.include make-help + @echo '' + @$(MAKE) -s -f utils/makefile.python python-help + +PHONY += install +install: pyenvinstall + +PHONY += uninstall +uninstall: pyenvuninstall + +PHONY += clean +clean: pyclean + $(call cmd,common_clean) + +PHONY += run +run: pyenvinstall + $(Q) ( \ + sed -i -e "s/debug : False/debug : True/g" ./searx/settings.yml ; \ + sleep 2 ; \ + xdg-open http://127.0.0.1:8888/ ; \ + sleep 3 ; \ + sed -i -e "s/debug : True/debug : False/g" ./searx/settings.yml ; \ + ) & + $(PY_ENV)/bin/python ./searx/webapp.py + +# test +# ---- + +PHONY += test test.pylint test.pep8 test.unit test.robot + +# TODO: balance linting with pylint +test: test.pep8 test.unit test.robot + - make pylint + +test.pep8: pyenvinstall + $(PY_ENV_ACT); ./manage.sh pep8_check + +test.unit: pyenvinstall + $(PY_ENV_ACT); ./manage.sh unit_tests + +test.robot: pyenvinstall + $(PY_ENV_ACT); ./manage.sh install_geckodriver + $(PY_ENV_ACT); ./manage.sh robot_tests + +.PHONY: $(PHONY) diff --git a/searx/engines/google.py b/searx/engines/google.py index 19bde710d..eed3a044e 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -107,13 +107,12 @@ images_path = '/images' supported_languages_url = 'https://www.google.com/preferences?#languages' # specific xpath variables -results_xpath = '//div[@class="g"]' -url_xpath = './/h3/a/@href' -title_xpath = './/h3' -content_xpath = './/span[@class="st"]' -content_misc_xpath = './/div[@class="f slp"]' -suggestion_xpath = '//p[@class="_Bmc"]' -spelling_suggestion_xpath = '//a[@class="spell"]' +results_xpath = '//div[contains(@class, "ZINbbc")]' +url_xpath = './/div[@class="kCrYT"][1]/a/@href' +title_xpath = './/div[@class="kCrYT"][1]/a/div[1]' +content_xpath = './/div[@class="kCrYT"][2]//div[contains(@class, "BNeawe")]//div[contains(@class, "BNeawe")]' +suggestion_xpath = '//div[contains(@class, "ZINbbc")][last()]//div[@class="rVLSBd"]/a//div[contains(@class, "BNeawe")]' +spelling_suggestion_xpath = '//div[@id="scc"]//a' # map : detail location map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()' @@ -199,10 +198,6 @@ def request(query, params): params['headers']['Accept-Language'] = language + ',' + language + '-' + country params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' - # Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse - params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)" - "AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1") - params['google_hostname'] = google_hostname return params @@ -274,9 +269,7 @@ def response(resp): content = extract_text_from_dom(result, content_xpath) if content is None: continue - content_misc = extract_text_from_dom(result, content_misc_xpath) - if content_misc is not None: - content = content_misc + "
" + content + # append result results.append({'url': url, 'title': title, diff --git a/searx/templates/oscar/advanced.html b/searx/templates/oscar/advanced.html index 95d99ba6a..bf5f86324 100644 --- a/searx/templates/oscar/advanced.html +++ b/searx/templates/oscar/advanced.html @@ -1,16 +1,17 @@ -