first commit

This commit is contained in:
git
2025-07-20 13:25:51 +10:00
commit a2971879f0
294 changed files with 42788 additions and 0 deletions
+234
View File
@@ -0,0 +1,234 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=missing-module-docstring, missing-class-docstring
import sys
from hashlib import sha256
from importlib import import_module
from os import listdir, makedirs, remove, stat, utime
from os.path import abspath, basename, dirname, exists, join
from shutil import copyfile
from pkgutil import iter_modules
from logging import getLogger
from typing import List, Tuple
from searx import logger, settings
class Plugin: # pylint: disable=too-few-public-methods
"""This class is currently never initialized and only used for type hinting."""
id: str
name: str
description: str
default_on: bool
js_dependencies: Tuple[str]
css_dependencies: Tuple[str]
preference_section: str
logger = logger.getChild("plugins")
required_attrs = (
# fmt: off
("name", str),
("description", str),
("default_on", bool)
# fmt: on
)
optional_attrs = (
# fmt: off
("js_dependencies", tuple),
("css_dependencies", tuple),
("preference_section", str),
# fmt: on
)
def sha_sum(filename):
with open(filename, "rb") as f:
file_content_bytes = f.read()
return sha256(file_content_bytes).hexdigest()
def sync_resource(base_path, resource_path, name, target_dir, plugin_dir):
dep_path = join(base_path, resource_path)
file_name = basename(dep_path)
resource_path = join(target_dir, file_name)
if not exists(resource_path) or sha_sum(dep_path) != sha_sum(resource_path):
try:
copyfile(dep_path, resource_path)
# copy atime_ns and mtime_ns, so the weak ETags (generated by
# the HTTP server) do not change
dep_stat = stat(dep_path)
utime(resource_path, ns=(dep_stat.st_atime_ns, dep_stat.st_mtime_ns))
except IOError:
logger.critical("failed to copy plugin resource {0} for plugin {1}".format(file_name, name))
sys.exit(3)
# returning with the web path of the resource
return join("plugins/external_plugins", plugin_dir, file_name)
def prepare_package_resources(plugin, plugin_module_name):
plugin_base_path = dirname(abspath(plugin.__file__))
plugin_dir = plugin_module_name
target_dir = join(settings["ui"]["static_path"], "plugins/external_plugins", plugin_dir)
try:
makedirs(target_dir, exist_ok=True)
except IOError:
logger.critical("failed to create resource directory {0} for plugin {1}".format(target_dir, plugin_module_name))
sys.exit(3)
resources = []
if hasattr(plugin, "js_dependencies"):
resources.extend(map(basename, plugin.js_dependencies))
plugin.js_dependencies = [
sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir)
for x in plugin.js_dependencies
]
if hasattr(plugin, "css_dependencies"):
resources.extend(map(basename, plugin.css_dependencies))
plugin.css_dependencies = [
sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir)
for x in plugin.css_dependencies
]
for f in listdir(target_dir):
if basename(f) not in resources:
resource_path = join(target_dir, basename(f))
try:
remove(resource_path)
except IOError:
logger.critical(
"failed to remove unused resource file {0} for plugin {1}".format(resource_path, plugin_module_name)
)
sys.exit(3)
def load_plugin(plugin_module_name, external):
# pylint: disable=too-many-branches
try:
plugin = import_module(plugin_module_name)
except (
SyntaxError,
KeyboardInterrupt,
SystemExit,
SystemError,
ImportError,
RuntimeError,
) as e:
logger.critical("%s: fatal exception", plugin_module_name, exc_info=e)
sys.exit(3)
except BaseException:
logger.exception("%s: exception while loading, the plugin is disabled", plugin_module_name)
return None
# difference with searx: use module name instead of the user name
plugin.id = plugin_module_name
#
plugin.logger = getLogger(plugin_module_name)
for plugin_attr, plugin_attr_type in required_attrs:
if not hasattr(plugin, plugin_attr):
logger.critical('%s: missing attribute "%s", cannot load plugin', plugin, plugin_attr)
sys.exit(3)
attr = getattr(plugin, plugin_attr)
if not isinstance(attr, plugin_attr_type):
type_attr = str(type(attr))
logger.critical(
'{1}: attribute "{0}" is of type {2}, must be of type {3}, cannot load plugin'.format(
plugin, plugin_attr, type_attr, plugin_attr_type
)
)
sys.exit(3)
for plugin_attr, plugin_attr_type in optional_attrs:
if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type):
setattr(plugin, plugin_attr, plugin_attr_type())
if not hasattr(plugin, "preference_section"):
plugin.preference_section = "general"
# query plugin
if plugin.preference_section == "query":
for plugin_attr in ("query_keywords", "query_examples"):
if not hasattr(plugin, plugin_attr):
logger.critical('missing attribute "{0}", cannot load plugin: {1}'.format(plugin_attr, plugin))
sys.exit(3)
if settings.get("enabled_plugins"):
# searx compatibility: plugin.name in settings['enabled_plugins']
plugin.default_on = plugin.name in settings["enabled_plugins"] or plugin.id in settings["enabled_plugins"]
# copy ressources if this is an external plugin
if external:
prepare_package_resources(plugin, plugin_module_name)
logger.debug("%s: loaded", plugin_module_name)
return plugin
def load_and_initialize_plugin(plugin_module_name, external, init_args):
plugin = load_plugin(plugin_module_name, external)
if plugin and hasattr(plugin, 'init'):
try:
return plugin if plugin.init(*init_args) else None
except Exception: # pylint: disable=broad-except
plugin.logger.exception("Exception while calling init, the plugin is disabled")
return None
return plugin
class PluginStore:
def __init__(self):
self.plugins: List[Plugin] = []
def __iter__(self):
for plugin in self.plugins:
yield plugin
def register(self, plugin):
self.plugins.append(plugin)
def call(self, ordered_plugin_list, plugin_type, *args, **kwargs):
ret = True
for plugin in ordered_plugin_list:
if hasattr(plugin, plugin_type):
try:
ret = getattr(plugin, plugin_type)(*args, **kwargs)
if not ret:
break
except Exception: # pylint: disable=broad-except
plugin.logger.exception("Exception while calling %s", plugin_type)
return ret
plugins = PluginStore()
def plugin_module_names():
yield_plugins = set()
# embedded plugins
for module in iter_modules(path=[dirname(__file__)]):
yield (__name__ + "." + module.name, False)
yield_plugins.add(module.name)
# external plugins
for module_name in settings['plugins']:
if module_name not in yield_plugins:
yield (module_name, True)
yield_plugins.add(module_name)
def initialize(app):
for module_name, external in plugin_module_names():
plugin = load_and_initialize_plugin(module_name, external, (app, settings))
if plugin:
plugins.register(plugin)
+29
View File
@@ -0,0 +1,29 @@
'''
SPDX-License-Identifier: AGPL-3.0-or-later
'''
from hashlib import md5
from searx.data import ahmia_blacklist_loader
name = "Ahmia blacklist"
description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
default_on = True
preference_section = 'onions'
ahmia_blacklist = None
def on_result(request, search, result):
if not result.get('is_onion') or not result.get('parsed_url'):
return True
result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest()
return result_hash not in ahmia_blacklist
def init(app, settings):
global ahmia_blacklist # pylint: disable=global-statement
if not settings['outgoing']['using_tor_proxy']:
# disable the plugin
return False
ahmia_blacklist = ahmia_blacklist_loader()
return True
+57
View File
@@ -0,0 +1,57 @@
'''
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
(C) 2018, 2020 by Vaclav Zouzalik
'''
from flask_babel import gettext
import hashlib
import re
name = "Hash plugin"
description = gettext("Converts strings to different hash digests.")
default_on = True
preference_section = 'query'
query_keywords = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512']
query_examples = 'sha512 The quick brown fox jumps over the lazy dog'
parser_re = re.compile('(md5|sha1|sha224|sha256|sha384|sha512) (.*)', re.I)
def post_search(request, search):
# process only on first page
if search.search_query.pageno > 1:
return True
m = parser_re.match(search.search_query.query)
if not m:
# wrong query
return True
function, string = m.groups()
if string.strip().__len__() == 0:
# end if the string is empty
return True
# select hash function
f = hashlib.new(function.lower())
# make digest from the given string
f.update(string.encode('utf-8').strip())
answer = function + " " + gettext('hash digest') + ": " + f.hexdigest()
# print result
search.result_container.answers.clear()
search.result_container.answers['hash'] = {'answer': answer}
return True
+46
View File
@@ -0,0 +1,46 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
import re
from urllib.parse import urlunparse, urlparse
from searx import settings
from searx.plugins import logger
from flask_babel import gettext
name = gettext('Hostname replace')
description = gettext('Rewrite result hostnames or remove results based on the hostname')
default_on = False
preference_section = 'general'
plugin_id = 'hostname_replace'
replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if plugin_id in settings else {}
logger = logger.getChild(plugin_id)
parsed = 'parsed_url'
_url_fields = ['iframe_src', 'audio_src']
def on_result(request, search, result):
for (pattern, replacement) in replacements.items():
if parsed in result:
if pattern.search(result[parsed].netloc):
# to keep or remove this result from the result list depends
# (only) on the 'parsed_url'
if not replacement:
return False
result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc))
result['url'] = urlunparse(result[parsed])
for url_field in _url_fields:
if result.get(url_field):
url_src = urlparse(result[url_field])
if pattern.search(url_src.netloc):
if not replacement:
del result[url_field]
else:
url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc))
result[url_field] = urlunparse(url_src)
return True
+32
View File
@@ -0,0 +1,32 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pyright: basic
"""see :ref:`limiter src`"""
import flask
from searx import redisdb
from searx.plugins import logger
from searx.botdetection import limiter
name = "Request limiter"
description = "Limit the number of request"
default_on = False
preference_section = 'service'
logger = logger.getChild('limiter')
def pre_request():
"""See :ref:`flask.Flask.before_request`"""
return limiter.filter_request(flask.request)
def init(app: flask.Flask, settings) -> bool:
if not settings['server']['limiter']:
return False
if not redisdb.client():
logger.error("The limiter requires Redis")
return False
app.before_request(pre_request)
return True
+47
View File
@@ -0,0 +1,47 @@
from urllib.parse import urlparse, parse_qsl
from flask_babel import gettext
import re
from searx import settings
regex = re.compile(r'10\.\d{4,9}/[^\s]+')
name = gettext('Open Access DOI rewrite')
description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available')
default_on = False
preference_section = 'general'
def extract_doi(url):
match = regex.search(url.path)
if match:
return match.group(0)
for _, v in parse_qsl(url.query):
match = regex.search(v)
if match:
return match.group(0)
return None
def get_doi_resolver(preferences):
doi_resolvers = settings['doi_resolvers']
selected_resolver = preferences.get_value('doi_resolver')[0]
if selected_resolver not in doi_resolvers:
selected_resolver = settings['default_doi_resolver']
return doi_resolvers[selected_resolver]
def on_result(request, search, result):
if 'parsed_url' not in result:
return True
doi = extract_doi(result['parsed_url'])
if doi and len(doi) < 50:
for suffix in ('/', '.pdf', '.xml', '/full', '/meta', '/abstract'):
if doi.endswith(suffix):
doi = doi[: -len(suffix)]
result['url'] = get_doi_resolver(request.preferences) + doi
result['parsed_url'] = urlparse(result['url'])
if 'doi' not in result:
result['doi'] = doi
return True
+24
View File
@@ -0,0 +1,24 @@
'''
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
'''
from flask_babel import gettext
name = gettext('Search on category select')
description = gettext(
'Perform search immediately if a category selected. Disable to select multiple categories. (JavaScript required)'
)
default_on = True
preference_section = 'ui'
+30
View File
@@ -0,0 +1,30 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=missing-module-docstring,invalid-name
import re
from flask_babel import gettext
from searx.botdetection._helpers import get_real_ip
name = gettext('Self Information')
description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".')
default_on = True
preference_section = 'query'
query_keywords = ['user-agent']
query_examples = ''
# Self User Agent regex
p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
def post_search(request, search):
if search.search_query.pageno > 1:
return True
if search.search_query.query == 'ip':
ip = get_real_ip(request)
search.result_container.answers['ip'] = {'answer': ip}
elif p.match(search.search_query.query):
ua = request.user_agent
search.result_container.answers['user-agent'] = {'answer': ua}
return True
+92
View File
@@ -0,0 +1,92 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""A plugin to check if the ip address of the request is a Tor exit-node if the
user searches for ``tor-check``. It fetches the tor exit node list from
https://check.torproject.org/exit-addresses and parses all the IPs into a list,
then checks if the user's IP address is in it.
Enable in ``settings.yml``:
.. code:: yaml
enabled_plugins:
..
- 'Tor check plugin'
"""
import re
from flask_babel import gettext
from httpx import HTTPError
from searx.network import get
default_on = False
name = gettext("Tor check plugin")
'''Translated name of the plugin'''
description = gettext(
"This plugin checks if the address of the request is a Tor exit-node, and"
" informs the user if it is; like check.torproject.org, but from SearXNG."
)
'''Translated description of the plugin.'''
preference_section = 'query'
'''The preference section where the plugin is shown.'''
query_keywords = ['tor-check']
'''Query keywords shown in the preferences.'''
query_examples = ''
'''Query examples shown in the preferences.'''
# Regex for exit node addresses in the list.
reg = re.compile(r"(?<=ExitAddress )\S+")
def post_search(request, search):
if search.search_query.pageno > 1:
return True
if search.search_query.query.lower() == "tor-check":
# Request the list of tor exit nodes.
try:
resp = get("https://check.torproject.org/exit-addresses")
node_list = re.findall(reg, resp.text)
except HTTPError:
# No answer, return error
search.result_container.answers["tor"] = {
"answer": gettext(
"Could not download the list of Tor exit-nodes from: https://check.torproject.org/exit-addresses"
)
}
return True
x_forwarded_for = request.headers.getlist("X-Forwarded-For")
if x_forwarded_for:
ip_address = x_forwarded_for[0]
else:
ip_address = request.remote_addr
if ip_address in node_list:
search.result_container.answers["tor"] = {
"answer": gettext(
"You are using Tor and it looks like you have this external IP address: {ip_address}".format(
ip_address=ip_address
)
)
}
else:
search.result_container.answers["tor"] = {
"answer": gettext(
"You are not using Tor and you have this external IP address: {ip_address}".format(
ip_address=ip_address
)
)
}
return True
+55
View File
@@ -0,0 +1,55 @@
'''
searx is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
searx is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
'''
from flask_babel import gettext
import re
from urllib.parse import urlunparse, parse_qsl, urlencode
regexes = {
re.compile(r'utm_[^&]+'),
re.compile(r'(wkey|wemail)[^&]*'),
re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'),
re.compile(r'&$'),
}
name = gettext('Tracker URL remover')
description = gettext('Remove trackers arguments from the returned URL')
default_on = True
preference_section = 'privacy'
def on_result(request, search, result):
if 'parsed_url' not in result:
return True
query = result['parsed_url'].query
if query == "":
return True
parsed_query = parse_qsl(query)
changes = 0
for i, (param_name, _) in enumerate(list(parsed_query)):
for reg in regexes:
if reg.match(param_name):
parsed_query.pop(i - changes)
changes += 1
result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query))
result['url'] = urlunparse(result['parsed_url'])
break
return True
+10
View File
@@ -0,0 +1,10 @@
from flask_babel import gettext
name = gettext('Vim-like hotkeys')
description = gettext(
'Navigate search results with Vim-like hotkeys '
'(JavaScript required). '
'Press "h" key on main or result page to get help.'
)
default_on = False
preference_section = 'ui'