first commit

2025-07-20 13:25:51 +10:00
commit a2971879f0
294 changed files with 42788 additions and 0 deletions
@@ -0,0 +1,234 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-module-docstring, missing-class-docstring
+
+import sys
+from hashlib import sha256
+from importlib import import_module
+from os import listdir, makedirs, remove, stat, utime
+from os.path import abspath, basename, dirname, exists, join
+from shutil import copyfile
+from pkgutil import iter_modules
+from logging import getLogger
+from typing import List, Tuple
+
+from searx import logger, settings
+
+
+class Plugin:  # pylint: disable=too-few-public-methods
+    """This class is currently never initialized and only used for type hinting."""
+
+    id: str
+    name: str
+    description: str
+    default_on: bool
+    js_dependencies: Tuple[str]
+    css_dependencies: Tuple[str]
+    preference_section: str
+
+
+logger = logger.getChild("plugins")
+
+required_attrs = (
+    # fmt: off
+    ("name", str),
+    ("description", str),
+    ("default_on", bool)
+    # fmt: on
+)
+
+optional_attrs = (
+    # fmt: off
+    ("js_dependencies", tuple),
+    ("css_dependencies", tuple),
+    ("preference_section", str),
+    # fmt: on
+)
+
+
+def sha_sum(filename):
+    with open(filename, "rb") as f:
+        file_content_bytes = f.read()
+        return sha256(file_content_bytes).hexdigest()
+
+
+def sync_resource(base_path, resource_path, name, target_dir, plugin_dir):
+    dep_path = join(base_path, resource_path)
+    file_name = basename(dep_path)
+    resource_path = join(target_dir, file_name)
+    if not exists(resource_path) or sha_sum(dep_path) != sha_sum(resource_path):
+        try:
+            copyfile(dep_path, resource_path)
+            # copy atime_ns and mtime_ns, so the weak ETags (generated by
+            # the HTTP server) do not change
+            dep_stat = stat(dep_path)
+            utime(resource_path, ns=(dep_stat.st_atime_ns, dep_stat.st_mtime_ns))
+        except IOError:
+            logger.critical("failed to copy plugin resource {0} for plugin {1}".format(file_name, name))
+            sys.exit(3)
+
+    # returning with the web path of the resource
+    return join("plugins/external_plugins", plugin_dir, file_name)
+
+
+def prepare_package_resources(plugin, plugin_module_name):
+    plugin_base_path = dirname(abspath(plugin.__file__))
+
+    plugin_dir = plugin_module_name
+    target_dir = join(settings["ui"]["static_path"], "plugins/external_plugins", plugin_dir)
+    try:
+        makedirs(target_dir, exist_ok=True)
+    except IOError:
+        logger.critical("failed to create resource directory {0} for plugin {1}".format(target_dir, plugin_module_name))
+        sys.exit(3)
+
+    resources = []
+
+    if hasattr(plugin, "js_dependencies"):
+        resources.extend(map(basename, plugin.js_dependencies))
+        plugin.js_dependencies = [
+            sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir)
+            for x in plugin.js_dependencies
+        ]
+
+    if hasattr(plugin, "css_dependencies"):
+        resources.extend(map(basename, plugin.css_dependencies))
+        plugin.css_dependencies = [
+            sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir)
+            for x in plugin.css_dependencies
+        ]
+
+    for f in listdir(target_dir):
+        if basename(f) not in resources:
+            resource_path = join(target_dir, basename(f))
+            try:
+                remove(resource_path)
+            except IOError:
+                logger.critical(
+                    "failed to remove unused resource file {0} for plugin {1}".format(resource_path, plugin_module_name)
+                )
+                sys.exit(3)
+
+
+def load_plugin(plugin_module_name, external):
+    # pylint: disable=too-many-branches
+    try:
+        plugin = import_module(plugin_module_name)
+    except (
+        SyntaxError,
+        KeyboardInterrupt,
+        SystemExit,
+        SystemError,
+        ImportError,
+        RuntimeError,
+    ) as e:
+        logger.critical("%s: fatal exception", plugin_module_name, exc_info=e)
+        sys.exit(3)
+    except BaseException:
+        logger.exception("%s: exception while loading, the plugin is disabled", plugin_module_name)
+        return None
+
+    # difference with searx: use module name instead of the user name
+    plugin.id = plugin_module_name
+
+    #
+    plugin.logger = getLogger(plugin_module_name)
+
+    for plugin_attr, plugin_attr_type in required_attrs:
+        if not hasattr(plugin, plugin_attr):
+            logger.critical('%s: missing attribute "%s", cannot load plugin', plugin, plugin_attr)
+            sys.exit(3)
+        attr = getattr(plugin, plugin_attr)
+        if not isinstance(attr, plugin_attr_type):
+            type_attr = str(type(attr))
+            logger.critical(
+                '{1}: attribute "{0}" is of type {2}, must be of type {3}, cannot load plugin'.format(
+                    plugin, plugin_attr, type_attr, plugin_attr_type
+                )
+            )
+            sys.exit(3)
+
+    for plugin_attr, plugin_attr_type in optional_attrs:
+        if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type):
+            setattr(plugin, plugin_attr, plugin_attr_type())
+
+    if not hasattr(plugin, "preference_section"):
+        plugin.preference_section = "general"
+
+    # query plugin
+    if plugin.preference_section == "query":
+        for plugin_attr in ("query_keywords", "query_examples"):
+            if not hasattr(plugin, plugin_attr):
+                logger.critical('missing attribute "{0}", cannot load plugin: {1}'.format(plugin_attr, plugin))
+                sys.exit(3)
+
+    if settings.get("enabled_plugins"):
+        # searx compatibility: plugin.name in settings['enabled_plugins']
+        plugin.default_on = plugin.name in settings["enabled_plugins"] or plugin.id in settings["enabled_plugins"]
+
+    # copy ressources if this is an external plugin
+    if external:
+        prepare_package_resources(plugin, plugin_module_name)
+
+    logger.debug("%s: loaded", plugin_module_name)
+
+    return plugin
+
+
+def load_and_initialize_plugin(plugin_module_name, external, init_args):
+    plugin = load_plugin(plugin_module_name, external)
+    if plugin and hasattr(plugin, 'init'):
+        try:
+            return plugin if plugin.init(*init_args) else None
+        except Exception:  # pylint: disable=broad-except
+            plugin.logger.exception("Exception while calling init, the plugin is disabled")
+            return None
+    return plugin
+
+
+class PluginStore:
+    def __init__(self):
+        self.plugins: List[Plugin] = []
+
+    def __iter__(self):
+        for plugin in self.plugins:
+            yield plugin
+
+    def register(self, plugin):
+        self.plugins.append(plugin)
+
+    def call(self, ordered_plugin_list, plugin_type, *args, **kwargs):
+        ret = True
+        for plugin in ordered_plugin_list:
+            if hasattr(plugin, plugin_type):
+                try:
+                    ret = getattr(plugin, plugin_type)(*args, **kwargs)
+                    if not ret:
+                        break
+                except Exception:  # pylint: disable=broad-except
+                    plugin.logger.exception("Exception while calling %s", plugin_type)
+        return ret
+
+
+plugins = PluginStore()
+
+
+def plugin_module_names():
+    yield_plugins = set()
+
+    # embedded plugins
+    for module in iter_modules(path=[dirname(__file__)]):
+        yield (__name__ + "." + module.name, False)
+        yield_plugins.add(module.name)
+    # external plugins
+    for module_name in settings['plugins']:
+        if module_name not in yield_plugins:
+            yield (module_name, True)
+            yield_plugins.add(module_name)
+
+
+def initialize(app):
+    for module_name, external in plugin_module_names():
+        plugin = load_and_initialize_plugin(module_name, external, (app, settings))
+        if plugin:
+            plugins.register(plugin)
@@ -0,0 +1,29 @@
+'''
+ SPDX-License-Identifier: AGPL-3.0-or-later
+'''
+
+from hashlib import md5
+from searx.data import ahmia_blacklist_loader
+
+name = "Ahmia blacklist"
+description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
+default_on = True
+preference_section = 'onions'
+
+ahmia_blacklist = None
+
+
+def on_result(request, search, result):
+    if not result.get('is_onion') or not result.get('parsed_url'):
+        return True
+    result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest()
+    return result_hash not in ahmia_blacklist
+
+
+def init(app, settings):
+    global ahmia_blacklist  # pylint: disable=global-statement
+    if not settings['outgoing']['using_tor_proxy']:
+        # disable the plugin
+        return False
+    ahmia_blacklist = ahmia_blacklist_loader()
+    return True
@@ -0,0 +1,57 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
+(C) 2018, 2020 by Vaclav Zouzalik
+'''
+
+from flask_babel import gettext
+import hashlib
+import re
+
+name = "Hash plugin"
+description = gettext("Converts strings to different hash digests.")
+default_on = True
+preference_section = 'query'
+query_keywords = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512']
+query_examples = 'sha512 The quick brown fox jumps over the lazy dog'
+
+parser_re = re.compile('(md5|sha1|sha224|sha256|sha384|sha512) (.*)', re.I)
+
+
+def post_search(request, search):
+    # process only on first page
+    if search.search_query.pageno > 1:
+        return True
+    m = parser_re.match(search.search_query.query)
+    if not m:
+        # wrong query
+        return True
+
+    function, string = m.groups()
+    if string.strip().__len__() == 0:
+        # end if the string is empty
+        return True
+
+    # select hash function
+    f = hashlib.new(function.lower())
+
+    # make digest from the given string
+    f.update(string.encode('utf-8').strip())
+    answer = function + " " + gettext('hash digest') + ": " + f.hexdigest()
+
+    # print result
+    search.result_container.answers.clear()
+    search.result_container.answers['hash'] = {'answer': answer}
+    return True
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import re
+from urllib.parse import urlunparse, urlparse
+from searx import settings
+from searx.plugins import logger
+from flask_babel import gettext
+
+name = gettext('Hostname replace')
+description = gettext('Rewrite result hostnames or remove results based on the hostname')
+default_on = False
+preference_section = 'general'
+
+plugin_id = 'hostname_replace'
+
+replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if plugin_id in settings else {}
+
+logger = logger.getChild(plugin_id)
+parsed = 'parsed_url'
+_url_fields = ['iframe_src', 'audio_src']
+
+
+def on_result(request, search, result):
+
+    for (pattern, replacement) in replacements.items():
+
+        if parsed in result:
+            if pattern.search(result[parsed].netloc):
+                # to keep or remove this result from the result list depends
+                # (only) on the 'parsed_url'
+                if not replacement:
+                    return False
+                result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
+                result['url'] = urlunparse(result[parsed])
+
+        for url_field in _url_fields:
+            if result.get(url_field):
+                url_src = urlparse(result[url_field])
+                if pattern.search(url_src.netloc):
+                    if not replacement:
+                        del result[url_field]
+                    else:
+                        url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc))
+                        result[url_field] = urlunparse(url_src)
+
+    return True
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pyright: basic
+"""see :ref:`limiter src`"""
+
+import flask
+
+from searx import redisdb
+from searx.plugins import logger
+from searx.botdetection import limiter
+
+name = "Request limiter"
+description = "Limit the number of request"
+default_on = False
+preference_section = 'service'
+
+logger = logger.getChild('limiter')
+
+
+def pre_request():
+    """See :ref:`flask.Flask.before_request`"""
+    return limiter.filter_request(flask.request)
+
+
+def init(app: flask.Flask, settings) -> bool:
+    if not settings['server']['limiter']:
+        return False
+    if not redisdb.client():
+        logger.error("The limiter requires Redis")
+        return False
+    app.before_request(pre_request)
+    return True
@@ -0,0 +1,47 @@
+from urllib.parse import urlparse, parse_qsl
+from flask_babel import gettext
+import re
+from searx import settings
+
+
+regex = re.compile(r'10\.\d{4,9}/[^\s]+')
+
+name = gettext('Open Access DOI rewrite')
+description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available')
+default_on = False
+preference_section = 'general'
+
+
+def extract_doi(url):
+    match = regex.search(url.path)
+    if match:
+        return match.group(0)
+    for _, v in parse_qsl(url.query):
+        match = regex.search(v)
+        if match:
+            return match.group(0)
+    return None
+
+
+def get_doi_resolver(preferences):
+    doi_resolvers = settings['doi_resolvers']
+    selected_resolver = preferences.get_value('doi_resolver')[0]
+    if selected_resolver not in doi_resolvers:
+        selected_resolver = settings['default_doi_resolver']
+    return doi_resolvers[selected_resolver]
+
+
+def on_result(request, search, result):
+    if 'parsed_url' not in result:
+        return True
+
+    doi = extract_doi(result['parsed_url'])
+    if doi and len(doi) < 50:
+        for suffix in ('/', '.pdf', '.xml', '/full', '/meta', '/abstract'):
+            if doi.endswith(suffix):
+                doi = doi[: -len(suffix)]
+        result['url'] = get_doi_resolver(request.preferences) + doi
+        result['parsed_url'] = urlparse(result['url'])
+        if 'doi' not in result:
+            result['doi'] = doi
+    return True
@@ -0,0 +1,24 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
+'''
+from flask_babel import gettext
+
+name = gettext('Search on category select')
+description = gettext(
+    'Perform search immediately if a category selected. Disable to select multiple categories. (JavaScript required)'
+)
+default_on = True
+preference_section = 'ui'
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-module-docstring,invalid-name
+
+import re
+from flask_babel import gettext
+
+from searx.botdetection._helpers import get_real_ip
+
+name = gettext('Self Information')
+description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".')
+default_on = True
+preference_section = 'query'
+query_keywords = ['user-agent']
+query_examples = ''
+
+# Self User Agent regex
+p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
+
+
+def post_search(request, search):
+    if search.search_query.pageno > 1:
+        return True
+    if search.search_query.query == 'ip':
+        ip = get_real_ip(request)
+        search.result_container.answers['ip'] = {'answer': ip}
+    elif p.match(search.search_query.query):
+        ua = request.user_agent
+        search.result_container.answers['user-agent'] = {'answer': ua}
+    return True
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""A plugin to check if the ip address of the request is a Tor exit-node if the
+user searches for ``tor-check``.  It fetches the tor exit node list from
+https://check.torproject.org/exit-addresses and parses all the IPs into a list,
+then checks if the user's IP address is in it.
+
+Enable in ``settings.yml``:
+
+.. code:: yaml
+
+  enabled_plugins:
+    ..
+    - 'Tor check plugin'
+
+"""
+
+import re
+from flask_babel import gettext
+from httpx import HTTPError
+from searx.network import get
+
+default_on = False
+
+name = gettext("Tor check plugin")
+'''Translated name of the plugin'''
+
+description = gettext(
+    "This plugin checks if the address of the request is a Tor exit-node, and"
+    " informs the user if it is; like check.torproject.org, but from SearXNG."
+)
+'''Translated description of the plugin.'''
+
+preference_section = 'query'
+'''The preference section where the plugin is shown.'''
+
+query_keywords = ['tor-check']
+'''Query keywords shown in the preferences.'''
+
+query_examples = ''
+'''Query examples shown in the preferences.'''
+
+# Regex for exit node addresses in the list.
+reg = re.compile(r"(?<=ExitAddress )\S+")
+
+
+def post_search(request, search):
+
+    if search.search_query.pageno > 1:
+        return True
+
+    if search.search_query.query.lower() == "tor-check":
+
+        # Request the list of tor exit nodes.
+        try:
+            resp = get("https://check.torproject.org/exit-addresses")
+            node_list = re.findall(reg, resp.text)
+
+        except HTTPError:
+            # No answer, return error
+            search.result_container.answers["tor"] = {
+                "answer": gettext(
+                    "Could not download the list of Tor exit-nodes from: https://check.torproject.org/exit-addresses"
+                )
+            }
+            return True
+
+        x_forwarded_for = request.headers.getlist("X-Forwarded-For")
+
+        if x_forwarded_for:
+            ip_address = x_forwarded_for[0]
+        else:
+            ip_address = request.remote_addr
+
+        if ip_address in node_list:
+            search.result_container.answers["tor"] = {
+                "answer": gettext(
+                    "You are using Tor and it looks like you have this external IP address: {ip_address}".format(
+                        ip_address=ip_address
+                    )
+                )
+            }
+        else:
+            search.result_container.answers["tor"] = {
+                "answer": gettext(
+                    "You are not using Tor and you have this external IP address: {ip_address}".format(
+                        ip_address=ip_address
+                    )
+                )
+            }
+
+    return True
@@ -0,0 +1,55 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
+'''
+
+from flask_babel import gettext
+import re
+from urllib.parse import urlunparse, parse_qsl, urlencode
+
+regexes = {
+    re.compile(r'utm_[^&]+'),
+    re.compile(r'(wkey|wemail)[^&]*'),
+    re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'),
+    re.compile(r'&$'),
+}
+
+name = gettext('Tracker URL remover')
+description = gettext('Remove trackers arguments from the returned URL')
+default_on = True
+preference_section = 'privacy'
+
+
+def on_result(request, search, result):
+    if 'parsed_url' not in result:
+        return True
+
+    query = result['parsed_url'].query
+
+    if query == "":
+        return True
+    parsed_query = parse_qsl(query)
+
+    changes = 0
+    for i, (param_name, _) in enumerate(list(parsed_query)):
+        for reg in regexes:
+            if reg.match(param_name):
+                parsed_query.pop(i - changes)
+                changes += 1
+                result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query))
+                result['url'] = urlunparse(result['parsed_url'])
+                break
+
+    return True
@@ -0,0 +1,10 @@
+from flask_babel import gettext
+
+name = gettext('Vim-like hotkeys')
+description = gettext(
+    'Navigate search results with Vim-like hotkeys '
+    '(JavaScript required). '
+    'Press "h" key on main or result page to get help.'
+)
+default_on = False
+preference_section = 'ui'