modified: .gitignore

new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/add-environment-setup-in-conftest.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/add-logging-to-geocode.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/add-logging-to-route_metrics.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/add-logging-to-tracking-simulator.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/extend-sqlite-tuning-in-database.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/fix-route-handling-in-routing.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/handle-api-response-errors-in-routing.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/refactor-database-path-handling-in-database.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/update-fcm-message-construction-in-notifications.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/update-role-check-in-ws.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/logs/refs/heads/codex/update-user-seed-in-database.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/add-environment-setup-in-conftest.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/add-logging-to-geocode.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/add-logging-to-route_metrics.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/add-logging-to-tracking-simulator.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/extend-sqlite-tuning-in-database.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/fix-route-handling-in-routing.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/handle-api-response-errors-in-routing.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/refactor-database-path-handling-in-database.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/update-fcm-message-construction-in-notifications.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/update-role-check-in-ws.py
	new file:   apps/gitea/data/git/repositories/beatzaplenty/limo-booking-app.git/refs/heads/codex/update-user-seed-in-database.py
	renamed:    gitea/docker-compose.yml -> apps/gitea/docker-compose.yml
	new file:   apps/gramps/docker-compose.yml
	renamed:    nextcloud/Dockerfile -> apps/nextcloud/Dockerfile
	new file:   apps/nextcloud/docker-compose.yml
	renamed:    passbolt/Dockerfile -> apps/passbolt/Dockerfile
	renamed:    passbolt/docker-compose.yml -> apps/passbolt/docker-compose.yml
	renamed:    searxng/Dockerfile -> apps/searxng/Dockerfile
	renamed:    searxng/docker-compose.yml -> apps/searxng/docker-compose.yml
	renamed:    searxng/dockerfiles/docker-entrypoint.sh -> apps/searxng/dockerfiles/docker-entrypoint.sh
	renamed:    searxng/docs/conf.py -> apps/searxng/docs/conf.py
	renamed:    searxng/docs/user/.gitignore -> apps/searxng/docs/user/.gitignore
	renamed:    searxng/examples/basic_engine.py -> apps/searxng/examples/basic_engine.py
	renamed:    searxng/searx/__init__.py -> apps/searxng/searx/__init__.py
	renamed:    searxng/searx/answerers/__init__.py -> apps/searxng/searx/answerers/__init__.py
	renamed:    searxng/searx/answerers/random/answerer.py -> apps/searxng/searx/answerers/random/answerer.py
	renamed:    searxng/searx/answerers/statistics/answerer.py -> apps/searxng/searx/answerers/statistics/answerer.py
	renamed:    searxng/searx/autocomplete.py -> apps/searxng/searx/autocomplete.py
	renamed:    searxng/searx/babel_extract.py -> apps/searxng/searx/babel_extract.py
	renamed:    searxng/searx/botdetection/__init__.py -> apps/searxng/searx/botdetection/__init__.py
	renamed:    searxng/searx/botdetection/_helpers.py -> apps/searxng/searx/botdetection/_helpers.py
	renamed:    searxng/searx/botdetection/http_accept.py -> apps/searxng/searx/botdetection/http_accept.py
	renamed:    searxng/searx/botdetection/http_accept_encoding.py -> apps/searxng/searx/botdetection/http_accept_encoding.py
	renamed:    searxng/searx/botdetection/http_accept_language.py -> apps/searxng/searx/botdetection/http_accept_language.py
	renamed:    searxng/searx/botdetection/http_connection.py -> apps/searxng/searx/botdetection/http_connection.py
	renamed:    searxng/searx/botdetection/http_user_agent.py -> apps/searxng/searx/botdetection/http_user_agent.py
	renamed:    searxng/searx/botdetection/ip_limit.py -> apps/searxng/searx/botdetection/ip_limit.py
	renamed:    searxng/searx/botdetection/ip_lists.py -> apps/searxng/searx/botdetection/ip_lists.py
	renamed:    searxng/searx/botdetection/limiter.py -> apps/searxng/searx/botdetection/limiter.py
	renamed:    searxng/searx/botdetection/link_token.py -> apps/searxng/searx/botdetection/link_token.py
	renamed:    searxng/searx/compat.py -> apps/searxng/searx/compat.py
	renamed:    searxng/searx/data/__init__.py -> apps/searxng/searx/data/__init__.py
	renamed:    searxng/searx/enginelib/__init__.py -> apps/searxng/searx/enginelib/__init__.py
	renamed:    searxng/searx/enginelib/traits.py -> apps/searxng/searx/enginelib/traits.py
	renamed:    searxng/searx/engines/1337x.py -> apps/searxng/searx/engines/1337x.py
	renamed:    searxng/searx/engines/9gag.py -> apps/searxng/searx/engines/9gag.py
	renamed:    searxng/searx/engines/__init__.py -> apps/searxng/searx/engines/__init__.py
	renamed:    searxng/searx/engines/ahmia.py -> apps/searxng/searx/engines/ahmia.py
	renamed:    searxng/searx/engines/annas_archive.py -> apps/searxng/searx/engines/annas_archive.py
	renamed:    searxng/searx/engines/apkmirror.py -> apps/searxng/searx/engines/apkmirror.py
	renamed:    searxng/searx/engines/apple_app_store.py -> apps/searxng/searx/engines/apple_app_store.py
	renamed:    searxng/searx/engines/apple_maps.py -> apps/searxng/searx/engines/apple_maps.py
	renamed:    searxng/searx/engines/archlinux.py -> apps/searxng/searx/engines/archlinux.py
	renamed:    searxng/searx/engines/artic.py -> apps/searxng/searx/engines/artic.py
	renamed:    searxng/searx/engines/arxiv.py -> apps/searxng/searx/engines/arxiv.py
	renamed:    searxng/searx/engines/bandcamp.py -> apps/searxng/searx/engines/bandcamp.py
	renamed:    searxng/searx/engines/base.py -> apps/searxng/searx/engines/base.py
	renamed:    searxng/searx/engines/bing.py -> apps/searxng/searx/engines/bing.py
	renamed:    searxng/searx/engines/bing_images.py -> apps/searxng/searx/engines/bing_images.py
	renamed:    searxng/searx/engines/bing_news.py -> apps/searxng/searx/engines/bing_news.py
	renamed:    searxng/searx/engines/bing_videos.py -> apps/searxng/searx/engines/bing_videos.py
	renamed:    searxng/searx/engines/brave.py -> apps/searxng/searx/engines/brave.py
	renamed:    searxng/searx/engines/bt4g.py -> apps/searxng/searx/engines/bt4g.py
	renamed:    searxng/searx/engines/btdigg.py -> apps/searxng/searx/engines/btdigg.py
	renamed:    searxng/searx/engines/command.py -> apps/searxng/searx/engines/command.py
	renamed:    searxng/searx/engines/core.py -> apps/searxng/searx/engines/core.py
	renamed:    searxng/searx/engines/crossref.py -> apps/searxng/searx/engines/crossref.py
	renamed:    searxng/searx/engines/currency_convert.py -> apps/searxng/searx/engines/currency_convert.py
	renamed:    searxng/searx/engines/dailymotion.py -> apps/searxng/searx/engines/dailymotion.py
	renamed:    searxng/searx/engines/deepl.py -> apps/searxng/searx/engines/deepl.py
	renamed:    searxng/searx/engines/deezer.py -> apps/searxng/searx/engines/deezer.py
	renamed:    searxng/searx/engines/demo_offline.py -> apps/searxng/searx/engines/demo_offline.py
	renamed:    searxng/searx/engines/demo_online.py -> apps/searxng/searx/engines/demo_online.py
	renamed:    searxng/searx/engines/deviantart.py -> apps/searxng/searx/engines/deviantart.py
	renamed:    searxng/searx/engines/dictzone.py -> apps/searxng/searx/engines/dictzone.py
	renamed:    searxng/searx/engines/digbt.py -> apps/searxng/searx/engines/digbt.py
	renamed:    searxng/searx/engines/docker_hub.py -> apps/searxng/searx/engines/docker_hub.py
	renamed:    searxng/searx/engines/doku.py -> apps/searxng/searx/engines/doku.py
	renamed:    searxng/searx/engines/duckduckgo.py -> apps/searxng/searx/engines/duckduckgo.py
	renamed:    searxng/searx/engines/duckduckgo_definitions.py -> apps/searxng/searx/engines/duckduckgo_definitions.py
	renamed:    searxng/searx/engines/duckduckgo_images.py -> apps/searxng/searx/engines/duckduckgo_images.py
	renamed:    searxng/searx/engines/duckduckgo_weather.py -> apps/searxng/searx/engines/duckduckgo_weather.py
	renamed:    searxng/searx/engines/duden.py -> apps/searxng/searx/engines/duden.py
	renamed:    searxng/searx/engines/dummy-offline.py -> apps/searxng/searx/engines/dummy-offline.py
	renamed:    searxng/searx/engines/dummy.py -> apps/searxng/searx/engines/dummy.py
	renamed:    searxng/searx/engines/ebay.py -> apps/searxng/searx/engines/ebay.py
	renamed:    searxng/searx/engines/elasticsearch.py -> apps/searxng/searx/engines/elasticsearch.py
	renamed:    searxng/searx/engines/emojipedia.py -> apps/searxng/searx/engines/emojipedia.py
	renamed:    searxng/searx/engines/fdroid.py -> apps/searxng/searx/engines/fdroid.py
	renamed:    searxng/searx/engines/flickr.py -> apps/searxng/searx/engines/flickr.py
	renamed:    searxng/searx/engines/flickr_noapi.py -> apps/searxng/searx/engines/flickr_noapi.py
	renamed:    searxng/searx/engines/framalibre.py -> apps/searxng/searx/engines/framalibre.py
	renamed:    searxng/searx/engines/freesound.py -> apps/searxng/searx/engines/freesound.py
	renamed:    searxng/searx/engines/frinkiac.py -> apps/searxng/searx/engines/frinkiac.py
	renamed:    searxng/searx/engines/genius.py -> apps/searxng/searx/engines/genius.py
	renamed:    searxng/searx/engines/gentoo.py -> apps/searxng/searx/engines/gentoo.py
	renamed:    searxng/searx/engines/github.py -> apps/searxng/searx/engines/github.py
	renamed:    searxng/searx/engines/google.py -> apps/searxng/searx/engines/google.py
	renamed:    searxng/searx/engines/google_images.py -> apps/searxng/searx/engines/google_images.py
	renamed:    searxng/searx/engines/google_news.py -> apps/searxng/searx/engines/google_news.py
	renamed:    searxng/searx/engines/google_play.py -> apps/searxng/searx/engines/google_play.py
	renamed:    searxng/searx/engines/google_scholar.py -> apps/searxng/searx/engines/google_scholar.py
	renamed:    searxng/searx/engines/google_videos.py -> apps/searxng/searx/engines/google_videos.py
	renamed:    searxng/searx/engines/imdb.py -> apps/searxng/searx/engines/imdb.py
	renamed:    searxng/searx/engines/ina.py -> apps/searxng/searx/engines/ina.py
	renamed:    searxng/searx/engines/invidious.py -> apps/searxng/searx/engines/invidious.py
	renamed:    searxng/searx/engines/jisho.py -> apps/searxng/searx/engines/jisho.py
	renamed:    searxng/searx/engines/json_engine.py -> apps/searxng/searx/engines/json_engine.py
	renamed:    searxng/searx/engines/kickass.py -> apps/searxng/searx/engines/kickass.py
	renamed:    searxng/searx/engines/lemmy.py -> apps/searxng/searx/engines/lemmy.py
	renamed:    searxng/searx/engines/lingva.py -> apps/searxng/searx/engines/lingva.py
	renamed:    searxng/searx/engines/loc.py -> apps/searxng/searx/engines/loc.py
	renamed:    searxng/searx/engines/mediathekviewweb.py -> apps/searxng/searx/engines/mediathekviewweb.py
	renamed:    searxng/searx/engines/mediawiki.py -> apps/searxng/searx/engines/mediawiki.py
	renamed:    searxng/searx/engines/meilisearch.py -> apps/searxng/searx/engines/meilisearch.py
	renamed:    searxng/searx/engines/metacpan.py -> apps/searxng/searx/engines/metacpan.py
	renamed:    searxng/searx/engines/mixcloud.py -> apps/searxng/searx/engines/mixcloud.py
	renamed:    searxng/searx/engines/mongodb.py -> apps/searxng/searx/engines/mongodb.py
	renamed:    searxng/searx/engines/mysql_server.py -> apps/searxng/searx/engines/mysql_server.py
	renamed:    searxng/searx/engines/nyaa.py -> apps/searxng/searx/engines/nyaa.py
	renamed:    searxng/searx/engines/opensemantic.py -> apps/searxng/searx/engines/opensemantic.py
	renamed:    searxng/searx/engines/openstreetmap.py -> apps/searxng/searx/engines/openstreetmap.py
	renamed:    searxng/searx/engines/openverse.py -> apps/searxng/searx/engines/openverse.py
	renamed:    searxng/searx/engines/pdbe.py -> apps/searxng/searx/engines/pdbe.py
	renamed:    searxng/searx/engines/peertube.py -> apps/searxng/searx/engines/peertube.py
	renamed:    searxng/searx/engines/photon.py -> apps/searxng/searx/engines/photon.py
	renamed:    searxng/searx/engines/piped.py -> apps/searxng/searx/engines/piped.py
	renamed:    searxng/searx/engines/piratebay.py -> apps/searxng/searx/engines/piratebay.py
	renamed:    searxng/searx/engines/postgresql.py -> apps/searxng/searx/engines/postgresql.py
	renamed:    searxng/searx/engines/pubmed.py -> apps/searxng/searx/engines/pubmed.py
	renamed:    searxng/searx/engines/qwant.py -> apps/searxng/searx/engines/qwant.py
	renamed:    searxng/searx/engines/recoll.py -> apps/searxng/searx/engines/recoll.py
	renamed:    searxng/searx/engines/reddit.py -> apps/searxng/searx/engines/reddit.py
	renamed:    searxng/searx/engines/redis_server.py -> apps/searxng/searx/engines/redis_server.py
	renamed:    searxng/searx/engines/rumble.py -> apps/searxng/searx/engines/rumble.py
	renamed:    searxng/searx/engines/scanr_structures.py -> apps/searxng/searx/engines/scanr_structures.py
	renamed:    searxng/searx/engines/searchcode_code.py -> apps/searxng/searx/engines/searchcode_code.py
	renamed:    searxng/searx/engines/searx_engine.py -> apps/searxng/searx/engines/searx_engine.py
	renamed:    searxng/searx/engines/semantic_scholar.py -> apps/searxng/searx/engines/semantic_scholar.py
	renamed:    searxng/searx/engines/sepiasearch.py -> apps/searxng/searx/engines/sepiasearch.py
	renamed:    searxng/searx/engines/seznam.py -> apps/searxng/searx/engines/seznam.py
	renamed:    searxng/searx/engines/sjp.py -> apps/searxng/searx/engines/sjp.py
	renamed:    searxng/searx/engines/solidtorrents.py -> apps/searxng/searx/engines/solidtorrents.py
	renamed:    searxng/searx/engines/solr.py -> apps/searxng/searx/engines/solr.py
	renamed:    searxng/searx/engines/soundcloud.py -> apps/searxng/searx/engines/soundcloud.py
	renamed:    searxng/searx/engines/spotify.py -> apps/searxng/searx/engines/spotify.py
	renamed:    searxng/searx/engines/springer.py -> apps/searxng/searx/engines/springer.py
	renamed:    searxng/searx/engines/sqlite.py -> apps/searxng/searx/engines/sqlite.py
	renamed:    searxng/searx/engines/stackexchange.py -> apps/searxng/searx/engines/stackexchange.py
	renamed:    searxng/searx/engines/startpage.py -> apps/searxng/searx/engines/startpage.py
	renamed:    searxng/searx/engines/tagesschau.py -> apps/searxng/searx/engines/tagesschau.py
	renamed:    searxng/searx/engines/tineye.py -> apps/searxng/searx/engines/tineye.py
	renamed:    searxng/searx/engines/tokyotoshokan.py -> apps/searxng/searx/engines/tokyotoshokan.py
	renamed:    searxng/searx/engines/torznab.py -> apps/searxng/searx/engines/torznab.py
	renamed:    searxng/searx/engines/translated.py -> apps/searxng/searx/engines/translated.py
	renamed:    searxng/searx/engines/twitter.py -> apps/searxng/searx/engines/twitter.py
	renamed:    searxng/searx/engines/unsplash.py -> apps/searxng/searx/engines/unsplash.py
	renamed:    searxng/searx/engines/vimeo.py -> apps/searxng/searx/engines/vimeo.py
	renamed:    searxng/searx/engines/wikidata.py -> apps/searxng/searx/engines/wikidata.py
	renamed:    searxng/searx/engines/wikipedia.py -> apps/searxng/searx/engines/wikipedia.py
	renamed:    searxng/searx/engines/wolframalpha_api.py -> apps/searxng/searx/engines/wolframalpha_api.py
	renamed:    searxng/searx/engines/wolframalpha_noapi.py -> apps/searxng/searx/engines/wolframalpha_noapi.py
	renamed:    searxng/searx/engines/wordnik.py -> apps/searxng/searx/engines/wordnik.py
	renamed:    searxng/searx/engines/wttr.py -> apps/searxng/searx/engines/wttr.py
	renamed:    searxng/searx/engines/www1x.py -> apps/searxng/searx/engines/www1x.py
	renamed:    searxng/searx/engines/xpath.py -> apps/searxng/searx/engines/xpath.py
	renamed:    searxng/searx/engines/yacy.py -> apps/searxng/searx/engines/yacy.py
	renamed:    searxng/searx/engines/yahoo.py -> apps/searxng/searx/engines/yahoo.py
	renamed:    searxng/searx/engines/yahoo_news.py -> apps/searxng/searx/engines/yahoo_news.py
	renamed:    searxng/searx/engines/youtube_api.py -> apps/searxng/searx/engines/youtube_api.py
	renamed:    searxng/searx/engines/youtube_noapi.py -> apps/searxng/searx/engines/youtube_noapi.py
	renamed:    searxng/searx/engines/zlibrary.py -> apps/searxng/searx/engines/zlibrary.py
	renamed:    searxng/searx/exceptions.py -> apps/searxng/searx/exceptions.py
	renamed:    searxng/searx/external_bang.py -> apps/searxng/searx/external_bang.py
	renamed:    searxng/searx/external_urls.py -> apps/searxng/searx/external_urls.py
	renamed:    searxng/searx/flaskfix.py -> apps/searxng/searx/flaskfix.py
	renamed:    searxng/searx/infopage/__init__.py -> apps/searxng/searx/infopage/__init__.py
	renamed:    searxng/searx/locales.py -> apps/searxng/searx/locales.py
	renamed:    searxng/searx/metrics/__init__.py -> apps/searxng/searx/metrics/__init__.py
	renamed:    searxng/searx/metrics/error_recorder.py -> apps/searxng/searx/metrics/error_recorder.py
	renamed:    searxng/searx/metrics/models.py -> apps/searxng/searx/metrics/models.py
	renamed:    searxng/searx/network/__init__.py -> apps/searxng/searx/network/__init__.py
	renamed:    searxng/searx/network/client.py -> apps/searxng/searx/network/client.py
	renamed:    searxng/searx/network/network.py -> apps/searxng/searx/network/network.py
	renamed:    searxng/searx/network/raise_for_httperror.py -> apps/searxng/searx/network/raise_for_httperror.py
	renamed:    searxng/searx/plugins/__init__.py -> apps/searxng/searx/plugins/__init__.py
	renamed:    searxng/searx/plugins/ahmia_filter.py -> apps/searxng/searx/plugins/ahmia_filter.py
	renamed:    searxng/searx/plugins/hash_plugin.py -> apps/searxng/searx/plugins/hash_plugin.py
	renamed:    searxng/searx/plugins/hostname_replace.py -> apps/searxng/searx/plugins/hostname_replace.py
	renamed:    searxng/searx/plugins/limiter.py -> apps/searxng/searx/plugins/limiter.py
	renamed:    searxng/searx/plugins/oa_doi_rewrite.py -> apps/searxng/searx/plugins/oa_doi_rewrite.py
	renamed:    searxng/searx/plugins/search_on_category_select.py -> apps/searxng/searx/plugins/search_on_category_select.py
	renamed:    searxng/searx/plugins/self_info.py -> apps/searxng/searx/plugins/self_info.py
	renamed:    searxng/searx/plugins/tor_check.py -> apps/searxng/searx/plugins/tor_check.py
	renamed:    searxng/searx/plugins/tracker_url_remover.py -> apps/searxng/searx/plugins/tracker_url_remover.py
	renamed:    searxng/searx/plugins/vim_hotkeys.py -> apps/searxng/searx/plugins/vim_hotkeys.py
	renamed:    searxng/searx/preferences.py -> apps/searxng/searx/preferences.py
	renamed:    searxng/searx/query.py -> apps/searxng/searx/query.py
	renamed:    searxng/searx/redisdb.py -> apps/searxng/searx/redisdb.py
	renamed:    searxng/searx/redislib.py -> apps/searxng/searx/redislib.py
	renamed:    searxng/searx/results.py -> apps/searxng/searx/results.py
	renamed:    searxng/searx/search/__init__.py -> apps/searxng/searx/search/__init__.py
	renamed:    searxng/searx/search/checker/__init__.py -> apps/searxng/searx/search/checker/__init__.py
	renamed:    searxng/searx/search/checker/__main__.py -> apps/searxng/searx/search/checker/__main__.py
	renamed:    searxng/searx/search/checker/background.py -> apps/searxng/searx/search/checker/background.py
	renamed:    searxng/searx/search/checker/impl.py -> apps/searxng/searx/search/checker/impl.py
	renamed:    searxng/searx/search/checker/scheduler.py -> apps/searxng/searx/search/checker/scheduler.py
	renamed:    searxng/searx/search/models.py -> apps/searxng/searx/search/models.py
	renamed:    searxng/searx/search/processors/__init__.py -> apps/searxng/searx/search/processors/__init__.py
	renamed:    searxng/searx/search/processors/abstract.py -> apps/searxng/searx/search/processors/abstract.py
	renamed:    searxng/searx/search/processors/offline.py -> apps/searxng/searx/search/processors/offline.py
	renamed:    searxng/searx/search/processors/online.py -> apps/searxng/searx/search/processors/online.py
	renamed:    searxng/searx/search/processors/online_currency.py -> apps/searxng/searx/search/processors/online_currency.py
	renamed:    searxng/searx/search/processors/online_dictionary.py -> apps/searxng/searx/search/processors/online_dictionary.py
	renamed:    searxng/searx/search/processors/online_url_search.py -> apps/searxng/searx/search/processors/online_url_search.py
	renamed:    searxng/searx/settings.yml -> apps/searxng/searx/settings.yml
	renamed:    searxng/searx/settings_defaults.py -> apps/searxng/searx/settings_defaults.py
	renamed:    searxng/searx/settings_loader.py -> apps/searxng/searx/settings_loader.py
	renamed:    searxng/searx/static/plugins/external_plugins/.gitignore -> apps/searxng/searx/static/plugins/external_plugins/.gitignore
	renamed:    searxng/searx/static/themes/simple/.gitattributes -> apps/searxng/searx/static/themes/simple/.gitattributes
	renamed:    searxng/searx/static/themes/simple/.gitignore -> apps/searxng/searx/static/themes/simple/.gitignore
	renamed:    searxng/searx/sxng_locales.py -> apps/searxng/searx/sxng_locales.py
	renamed:    searxng/searx/tools/__init__.py -> apps/searxng/searx/tools/__init__.py
	renamed:    searxng/searx/tools/config.py -> apps/searxng/searx/tools/config.py
	renamed:    searxng/searx/unixthreadname.py -> apps/searxng/searx/unixthreadname.py
	renamed:    searxng/searx/utils.py -> apps/searxng/searx/utils.py
	renamed:    searxng/searx/version.py -> apps/searxng/searx/version.py
	renamed:    searxng/searx/webadapter.py -> apps/searxng/searx/webadapter.py
	renamed:    searxng/searx/webapp.py -> apps/searxng/searx/webapp.py
	renamed:    searxng/searx/webutils.py -> apps/searxng/searx/webutils.py
	renamed:    searxng/searxng_extra/__init__.py -> apps/searxng/searxng_extra/__init__.py
	renamed:    searxng/searxng_extra/standalone_searx.py -> apps/searxng/searxng_extra/standalone_searx.py
	renamed:    searxng/searxng_extra/update/__init__.py -> apps/searxng/searxng_extra/update/__init__.py
	renamed:    searxng/searxng_extra/update/update_ahmia_blacklist.py -> apps/searxng/searxng_extra/update/update_ahmia_blacklist.py
	renamed:    searxng/searxng_extra/update/update_currencies.py -> apps/searxng/searxng_extra/update/update_currencies.py
	renamed:    searxng/searxng_extra/update/update_engine_descriptions.py -> apps/searxng/searxng_extra/update/update_engine_descriptions.py
	renamed:    searxng/searxng_extra/update/update_engine_traits.py -> apps/searxng/searxng_extra/update/update_engine_traits.py
	renamed:    searxng/searxng_extra/update/update_external_bangs.py -> apps/searxng/searxng_extra/update/update_external_bangs.py
	renamed:    searxng/searxng_extra/update/update_firefox_version.py -> apps/searxng/searxng_extra/update/update_firefox_version.py
	renamed:    searxng/searxng_extra/update/update_osm_keys_tags.py -> apps/searxng/searxng_extra/update/update_osm_keys_tags.py
	renamed:    searxng/searxng_extra/update/update_pygments.py -> apps/searxng/searxng_extra/update/update_pygments.py
	renamed:    searxng/searxng_extra/update/update_wikidata_units.py -> apps/searxng/searxng_extra/update/update_wikidata_units.py
	renamed:    searxng/setup.py -> apps/searxng/setup.py
	renamed:    searxng/tests/__init__.py -> apps/searxng/tests/__init__.py
	renamed:    searxng/tests/robot/__init__.py -> apps/searxng/tests/robot/__init__.py
	renamed:    searxng/tests/robot/__main__.py -> apps/searxng/tests/robot/__main__.py
	renamed:    searxng/tests/robot/settings_robot.yml -> apps/searxng/tests/robot/settings_robot.yml
	renamed:    searxng/tests/robot/test_webapp.py -> apps/searxng/tests/robot/test_webapp.py
	renamed:    searxng/tests/unit/__init__.py -> apps/searxng/tests/unit/__init__.py
	renamed:    searxng/tests/unit/engines/test_command.py -> apps/searxng/tests/unit/engines/test_command.py
	renamed:    searxng/tests/unit/engines/test_xpath.py -> apps/searxng/tests/unit/engines/test_xpath.py
	renamed:    searxng/tests/unit/network/__init__.py -> apps/searxng/tests/unit/network/__init__.py
	renamed:    searxng/tests/unit/network/test_network.py -> apps/searxng/tests/unit/network/test_network.py
	renamed:    searxng/tests/unit/settings/empty_settings.yml -> apps/searxng/tests/unit/settings/empty_settings.yml
	renamed:    searxng/tests/unit/settings/syntaxerror_settings.yml -> apps/searxng/tests/unit/settings/syntaxerror_settings.yml
	renamed:    searxng/tests/unit/settings/test_settings.yml -> apps/searxng/tests/unit/settings/test_settings.yml
	renamed:    searxng/tests/unit/settings/user_settings.yml -> apps/searxng/tests/unit/settings/user_settings.yml
	renamed:    searxng/tests/unit/settings/user_settings_keep_only.yml -> apps/searxng/tests/unit/settings/user_settings_keep_only.yml
	renamed:    searxng/tests/unit/settings/user_settings_remove.yml -> apps/searxng/tests/unit/settings/user_settings_remove.yml
	renamed:    searxng/tests/unit/settings/user_settings_remove2.yml -> apps/searxng/tests/unit/settings/user_settings_remove2.yml
	renamed:    searxng/tests/unit/settings/user_settings_simple.yml -> apps/searxng/tests/unit/settings/user_settings_simple.yml
	renamed:    searxng/tests/unit/test_answerers.py -> apps/searxng/tests/unit/test_answerers.py
	renamed:    searxng/tests/unit/test_engines_init.py -> apps/searxng/tests/unit/test_engines_init.py
	renamed:    searxng/tests/unit/test_exceptions.py -> apps/searxng/tests/unit/test_exceptions.py
	renamed:    searxng/tests/unit/test_external_bangs.py -> apps/searxng/tests/unit/test_external_bangs.py
	renamed:    searxng/tests/unit/test_locales.py -> apps/searxng/tests/unit/test_locales.py
	renamed:    searxng/tests/unit/test_plugins.py -> apps/searxng/tests/unit/test_plugins.py
	renamed:    searxng/tests/unit/test_preferences.py -> apps/searxng/tests/unit/test_preferences.py
	renamed:    searxng/tests/unit/test_query.py -> apps/searxng/tests/unit/test_query.py
	renamed:    searxng/tests/unit/test_results.py -> apps/searxng/tests/unit/test_results.py
	renamed:    searxng/tests/unit/test_search.py -> apps/searxng/tests/unit/test_search.py
	renamed:    searxng/tests/unit/test_settings_loader.py -> apps/searxng/tests/unit/test_settings_loader.py
	renamed:    searxng/tests/unit/test_utils.py -> apps/searxng/tests/unit/test_utils.py
	renamed:    searxng/tests/unit/test_webadapter.py -> apps/searxng/tests/unit/test_webadapter.py
	renamed:    searxng/tests/unit/test_webapp.py -> apps/searxng/tests/unit/test_webapp.py
	renamed:    searxng/tests/unit/test_webutils.py -> apps/searxng/tests/unit/test_webutils.py
	renamed:    searxng/utils/build_env.py -> apps/searxng/utils/build_env.py
	renamed:    searxng/utils/filtron.sh -> apps/searxng/utils/filtron.sh
	renamed:    searxng/utils/lib.sh -> apps/searxng/utils/lib.sh
	renamed:    searxng/utils/lib_go.sh -> apps/searxng/utils/lib_go.sh
	renamed:    searxng/utils/lib_nvm.sh -> apps/searxng/utils/lib_nvm.sh
	renamed:    searxng/utils/lib_redis.sh -> apps/searxng/utils/lib_redis.sh
	renamed:    searxng/utils/lib_sxng_data.sh -> apps/searxng/utils/lib_sxng_data.sh
	renamed:    searxng/utils/lib_sxng_node.sh -> apps/searxng/utils/lib_sxng_node.sh
	renamed:    searxng/utils/lib_sxng_static.sh -> apps/searxng/utils/lib_sxng_static.sh
	renamed:    searxng/utils/lib_sxng_test.sh -> apps/searxng/utils/lib_sxng_test.sh
	renamed:    searxng/utils/lib_sxng_themes.sh -> apps/searxng/utils/lib_sxng_themes.sh
	renamed:    searxng/utils/lib_sxng_weblate.sh -> apps/searxng/utils/lib_sxng_weblate.sh
	renamed:    searxng/utils/lxc.sh -> apps/searxng/utils/lxc.sh
	renamed:    searxng/utils/morty.sh -> apps/searxng/utils/morty.sh
	renamed:    searxng/utils/searx.sh -> apps/searxng/utils/searx.sh
	renamed:    searxng/utils/searxng.sh -> apps/searxng/utils/searxng.sh
	renamed:    searxng/utils/searxng_check.py -> apps/searxng/utils/searxng_check.py
	renamed:    searxng/utils/templates/etc/searxng/settings.yml -> apps/searxng/utils/templates/etc/searxng/settings.yml
	new file:   apps/shift-recorder
	new file:   apps/stockfill
	new file:   core/authelia/configuration.yml
	new file:   core/authelia/users_database.yml
	new file:   core/crowdsec/Dockerfile
	new file:   core/crowdsec/data/detect.yaml
	new file:   core/docker-compose.yml
	new file:   core/test/Dockerfile
	new file:   core/test/docker-compose.yml
	new file:   core/test/exporter.py
	new file:   core/traefik/data/dynamic.yaml
	renamed:    traefik/data/plugins.yaml -> core/traefik/data/plugins.yaml
	new file:   core/traefik/dynamic.yml
	new file:   core/traefik/traefik.yml
	new file:   default-network.yml
	new file:   monitoring/docker-exporter/Dockerfile
	new file:   monitoring/docker-exporter/exporter.py
	new file:   monitoring/gotify/docker-compose.yml
	new file:   monitoring/gotify/docker-health-to-gotify.sh
	new file:   monitoring/grafana/docker-compose.yml
	new file:   monitoring/node-red/Dockerfile
	new file:   monitoring/node-red/data/test-container.sh
	new file:   monitoring/node-red/docker-compose.yml
	new file:   monitoring/portainer/docker-compose.yml
	new file:   monitoring/prometheus/docker-compose.yml
	new file:   monitoring/prometheus/prometheus.yml
	new file:   monitoring/prometheus/rules/alerts.yml
	new file:   monitoring/uptime-kuma/docker-compose.yml
	deleted:    nextcloud/docker-compose.yml
	new file:   services-up.sh
	deleted:    traefik/docker-compose.yml
	deleted:    traefik/traefik.Dockerfile
	modified:   update-containers.py
	modified:   update-containers.sh

	modified:   apps/shift-recorder (modified content)
	modified:   apps/stockfill (modified content)
This commit is contained in:
git
2026-03-31 19:59:49 +10:00
parent d5b6cb22cd
commit b71cd3fcbb
340 changed files with 2084 additions and 311 deletions
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 8af5ca464225c888f5438a0fd226937e2ccabca4 Gitea <gitea@fake.local> 1757451832 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 bb76e891d59a88beeb4f5b233cbecfd94a8f0cae Gitea <gitea@fake.local> 1756461833 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 f5998826f6b9ed820f5a97a67ce11402e72fd2f3 Gitea <gitea@fake.local> 1756461833 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 791c61ee8129113a02d67ffa171ed48843cdf025 Gitea <gitea@fake.local> 1756815232 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 e76063242b47225e6614f17749b1bf2ba0b70ac9 Gitea <gitea@fake.local> 1757407432 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 1c93f2ab9ceef7601c5db93de0ddb057aef5b4b1 Gitea <gitea@fake.local> 1756433632 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 0404d47f0ba21269865f307f3fd53e746ff155a4 Gitea <gitea@fake.local> 1756436033 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 80111cd7579abc6319f5d357da060db8186babaf Gitea <gitea@fake.local> 1758786607 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 0fea5ebd8be8d93f95630bdc5cc9ecc0b0bbac43 Gitea <gitea@fake.local> 1756949032 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 7b87a0e2a6c03e5344da2fe6a391c1f1fb269b5c Gitea <gitea@fake.local> 1756851832 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
0000000000000000000000000000000000000000 bde15e4b736be753b1272a3277f528d7eb75d371 Gitea <gitea@fake.local> 1756508033 +0000 fetch --prune --tags origin: storing head
@@ -0,0 +1 @@
8af5ca464225c888f5438a0fd226937e2ccabca4
@@ -0,0 +1 @@
bb76e891d59a88beeb4f5b233cbecfd94a8f0cae
@@ -0,0 +1 @@
f5998826f6b9ed820f5a97a67ce11402e72fd2f3
@@ -0,0 +1 @@
791c61ee8129113a02d67ffa171ed48843cdf025
@@ -0,0 +1 @@
e76063242b47225e6614f17749b1bf2ba0b70ac9
@@ -0,0 +1 @@
1c93f2ab9ceef7601c5db93de0ddb057aef5b4b1
@@ -0,0 +1 @@
7b87a0e2a6c03e5344da2fe6a391c1f1fb269b5c
@@ -0,0 +1 @@
bde15e4b736be753b1272a3277f528d7eb75d371
+39
View File
@@ -0,0 +1,39 @@
services:
gitea:
profiles: ["apps","all","gitea"]
container_name: gitea
image: gitea/gitea:latest # change to 1-rootless once find out how to move data.
restart: always
environment:
- USER_UID=1000
- USER_GID=1000
- GITEA__database__DB_TYPE=sqlite3
- GITEA__server__ROOT_URL=https://gitea.lan.ddnsgeek.com/
volumes:
- ${PROJECT_ROOT}/apps/gitea/data:/data
networks:
- traefik
labels:
- "traefik.enable=true"
- "traefik.http.routers.gitea.rule=Host(`gitea.lan.ddnsgeek.com`)"
- "traefik.http.routers.gitea.entrypoints=websecure"
- "traefik.http.routers.gitea.tls=true"
- "traefik.http.routers.gitea.tls.certresolver=myresolver"
- "traefik.http.services.gitea.loadbalancer.server.port=3000"
- "io.portainer.accesscontrol.public"
- "traefik.docker.network=core_traefik"
healthcheck:
test: ["CMD-SHELL", "curl -fsS http://localhost:3000/api/healthz >/dev/null"]
interval: 30s
timeout: 5s
retries: 6
start_period: 120s
#volumes:
# gitea_data:
#networks:
# traefik_reverse_proxy:
# external: true
+71
View File
@@ -0,0 +1,71 @@
services:
gramps-db:
profiles: ["apps","all","gramps"]
image: postgres:13
container_name: gramps-db
restart: always
environment:
POSTGRES_USER: gramps
POSTGRES_PASSWORD: grampspassword
POSTGRES_DB: gramps
volumes:
- ${PROJECT_ROOT}/apps/gramps/db:/var/lib/postgresql
networks:
- gramps
healthcheck:
test: ["CMD-SHELL", "pg_isready -h db -p 5432 -U gramps -d gramps"]
interval: 10s
timeout: 5s
retries: 12
start_period: 30s
grampsweb:
profiles: ["apps","all","gramps"]
image: ghcr.io/gramps-project/grampsweb:latest
container_name: gramps-web
depends_on:
- gramps-db
restart: always
# ports:
# - "5000:5000" # access via http://localhost:5000
environment:
DB_URI: postgresql://gramps:grampspassword@db:5432/gramps
GRAMPSWEB_LOGLEVEL: INFO
# default admin user created on first run:
INITIAL_ADMIN: admin
INITIAL_ADMIN_PASSWORD: admin
# optional: storage paths inside container
GRAMPSWEB_MEDIAPATH: /app/media
GRAMPSWEB_TREE: "main"
volumes:
- ${PROJECT_ROOT}/apps/gramps/data/users:/app/users
- ${PROJECT_ROOT}/apps/gramps/data/media:/app/media
- ${PROJECT_ROOT}/apps/gramps/data/cache:/app/cache
labels:
- "traefik.http.routers.gramps.rule=Host(`familytree.lan.ddnsgeek.com`)"
- "traefik.enable=true"
- "traefik.http.routers.gramps.entrypoints=websecure"
- "traefik.http.routers.gramps.tls.certresolver=myresolver"
- "io.portainer.accesscontrol.public"
- "traefik.http.services.gramps.loadbalancer.server.port=5000"
- "traefik.docker.network=core_traefik"
networks:
- traefik
- gramps
healthcheck:
test:
- CMD-SHELL
- wget -qO- http://127.0.0.1:5000/ >/dev/null
interval: 30s
timeout: 5s
retries: 6
start_period: 60s
networks:
# traefik_reverse_proxy:
# external: true
gramps:
# driver: bridge
+19
View File
@@ -0,0 +1,19 @@
FROM nextcloud:production
#RUN groupadd -r doods && useradd -m -s /bin/bash -d /opt/doods -g doods doods
#RUN chsh -s /usr/sbin/nologin root
#RUN chown -R doods:doods /opt/doods
#ENV PATH "${PATH}:/opt/doods"
#ENV HOME /opt/doods
#USER root
RUN apt-get update && \
apt-get install -y ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
#USER www-data
+130
View File
@@ -0,0 +1,130 @@
services:
nextcloud-webapp:
# image: nextcloud:production
profiles: ["apps","all","nextcloud"]
build:
context: ${PROJECT_ROOT}/apps/nextcloud
container_name: nextcloud-webapp
restart: always
hostname: nextcloud.lan.ddnsgeek.com
volumes:
- ${PROJECT_ROOT}/apps/nextcloud/data:/var/www/html/data:rw
- ${PROJECT_ROOT}/apps/nextcloud/config:/var/www/html/config:rw
- type: tmpfs
target: /tmp:exec
depends_on:
- nextcloud-db
- nextcloud-redis
environment:
- MYSQL_PASSWORD=R1m@dmin
- MYSQL_DATABASE=nextcloud
- MYSQL_USER=nextcloud
- MYSQL_HOST=nextcloud_db:3306
- NEXTCLOUD_TRUSTED_DOMAINS=nextcloud.lan.ddnsgeek.com
- OVERWRITEPROTOCOL=https
- OVERWRITECLIURL=https://nextcloud.lan.ddnsgeek.com
- SMTP_HOST=smtp.gmail.com
- SMTP_SECURE=tls
- SMTP_PORT=587
- SMTP_AUTHTYPE=login
- MAIL_FROM_ADDRESS=beatz174
- MAIL_DOMAIN=gmail.com
- SMTP_NAME=beatz174@gmail.com
- SMTP_PASSWORD=kqdw fvml wlag ldgv
- REDIS_HOST=redis
- REDIS_HOST_PORT=6379
- REDIS_HOST_PASSWORD=TzBF8wcJNmVd9p2CTmBejPS9dpye6kWQeH3DmrQS9TPfTRriSHFN5VqH4CgzcuVZYWH2GBb7QU5GuEpNDGYdKjM6hjmLyjSgCFMiPms3Hv9n
networks:
- traefik
- nextcloud
labels:
- "traefik.http.routers.nextcloud.rule=Host(`nextcloud.lan.ddnsgeek.com`)"
- "traefik.enable=true"
- "traefik.http.routers.nextcloud.entrypoints=websecure"
- "traefik.http.routers.nextcloud.tls.certresolver=myresolver"
- "io.portainer.accesscontrol.public"
- "traefik.http.routers.nextcloud.middlewares=nextcloud-dav, nextcloud-webfinger"
- "traefik.http.middlewares.nextcloud-dav.replacepathregex.regex=^/.well-known/ca(l|rd)dav"
- "traefik.http.middlewares.nextcloud-dav.replacepathregex.replacement=/remote.php/dav/"
- "traefik.http.middlewares.nextcloud-nodeinfo.replacepathregex.regex=^/.well-known/nodeinfo"
- "traefik.http.middlewares.nextcloud-nodeinfo.replacepathregex.replacement=/nextcloud/index.php/.well-known/nodeinfo/"
- "traefik.http.middlewares.nextcloud-webfinger.redirectregex.permanent=true"
- "traefik.http.middlewares.nextcloud-webfinger.redirectregex.regex=https://(.*)/.well-known/webfinger"
- "traefik.http.middlewares.nextcloud-webfinger.redirectregex.replacement=https://$${1}/nextcloud/index.php/.well-known/webfinger"
- "traefik.docker.network=core_traefik"
healthcheck:
test:
- CMD-SHELL
- >-
php -r '$$f=@fsockopen("127.0.0.1",80,$$e,$$s,2); if(!$$f) exit(1);
fwrite($$f,"GET /status.php HTTP/1.0\r\nHost: localhost\r\nConnection: close\r\n\r\n");
$$o=""; while(!feof($$f)){$$o.=fgets($$f,1024);} fclose($$f);
if(strpos($$o,"\"installed\":true")===false) exit(1);'
interval: 30s
timeout: 5s
retries: 6
start_period: 180s
nextcloud-db:
image: mariadb:11.4
restart: always
profiles: ["apps","all","nextcloud"]
container_name: nextcloud-db
hostname: nextcloud_db
command: --transaction-isolation=READ-COMMITTED --log-bin=binlog --binlog-format=ROW
volumes:
- ${PROJECT_ROOT}/apps/nextcloud/database:/var/lib/mysql:rw
environment:
- MYSQL_ROOT_PASSWORD=R1m@dmin
- MYSQL_PASSWORD=R1m@dmin
- MYSQL_DATABASE=nextcloud
- MYSQL_USER=nextcloud
- MARIADB_AUTO_UPGRADE=1
- NEXTCLOUD_ADMIN_USER=admin
- NEXTCLOUD_ADMIN_PASSWORD=R1m@dmin
networks:
- nextcloud
labels:
- "io.portainer.accesscontrol.public"
healthcheck:
test: ["CMD-SHELL", "mariadb-admin ping -u nextcloud --password=R1m@dmin --silent"]
interval: 10s
timeout: 5s
retries: 12
start_period: 60s
nextcloud-redis:
image: "redis"
profiles: ["apps","all","nextcloud"]
command: ["redis-server", "--requirepass", "TzBF8wcJNmVd9p2CTmBejPS9dpye6kWQeH3DmrQS9TPfTRriSHFN5VqH4CgzcuVZYWH2GBb7QU5GuEpNDGYdKjM6hjmLyjSgCFMiPms3Hv9n", "--appendonly", "yes", "--save", "60", "1000"]
hostname: redis
container_name: nextcloud-redis
environment:
- REDIS_HOST_PASSWORD=TzBF8wcJNmVd9p2CTmBejPS9dpye6kWQeH3DmrQS9TPfTRriSHFN5VqH4CgzcuVZYWH2GBb7QU5GuEpNDGYdKjM6hjmLyjSgCFMiPms3Hv9n
volumes:
- ${PROJECT_ROOT}/apps/nextcloud/data/redis:/data:rw
restart: always
networks:
- nextcloud
labels:
- "io.portainer.accesscontrol.public"
healthcheck:
test: ["CMD-SHELL", "redis-cli -a TzBF8wcJNmVd9p2CTmBejPS9dpye6kWQeH3DmrQS9TPfTRriSHFN5VqH4CgzcuVZYWH2GBb7QU5GuEpNDGYdKjM6hjmLyjSgCFMiPms3Hv9n PING | grep -q PONG"]
interval: 10s
timeout: 5s
retries: 6
start_period: 10s
networks:
# traefik_reverse_proxy:
# external: true
nextcloud:
# driver: bridge
+18
View File
@@ -0,0 +1,18 @@
FROM passbolt/passbolt:latest-ce
RUN groupadd -r passbolt && useradd -m -s /bin/bash -d /media/data -g passbolt passbolt
RUN chsh -s /usr/sbin/nologin root
RUN chown -R passbolt:passbolt /etc/nginx
RUN chown -R passbolt:passbolt /var/lib/nginx
RUN chown -R passbolt:passbolt /run
#COPY nginx.conf /etc/nginx/nginx.conf
#ENV PATH "${PATH}:/opt/doods"
#ENV HOME /media/data
USER passbolt
# ENTRYPOINT ["python3", "main.py"]
# CMD ["api"]
+79
View File
@@ -0,0 +1,79 @@
services:
passbolt-db:
profiles: ["apps","all","passbolt"]
container_name: passbolt-db
image: mariadb:12
restart: always
environment:
MYSQL_RANDOM_ROOT_PASSWORD: "true"
MYSQL_DATABASE: "passbolt"
MYSQL_USER: "passbolt"
MYSQL_PASSWORD: "P4ssb0lt"
volumes:
- ${PROJECT_ROOT}/apps/passbolt/data/database:/var/lib/mysql
networks:
- passbolt
healthcheck:
test: ["CMD-SHELL", "mariadb-admin ping -h 127.0.0.1 -u\"$$MARIADB_USER\" -p\"$$MARIADB_PASSWORD\" --silent"]
interval: 10s
timeout: 5s
retries: 12
start_period: 60s
labels:
- "io.portainer.accesscontrol.public"
passbolt-webapp:
image: passbolt/passbolt:latest-ce
profiles: ["apps","all","passbolt"]
container_name: passbolt-webapp
#Alternatively you can use rootless:
restart: always
depends_on:
- passbolt-db
environment:
APP_FULL_BASE_URL: https://passbolt.lan.ddnsgeek.com
DATASOURCES_DEFAULT_HOST: "passbolt-db"
DATASOURCES_DEFAULT_USERNAME: "passbolt"
DATASOURCES_DEFAULT_PASSWORD: "P4ssb0lt"
DATASOURCES_DEFAULT_DATABASE: "passbolt"
PASSBOLT_GPG_SERVER_KEY_FINGERPRINT: "CBBB2B8F3E9FACA114537ACB8965B750F7363586"
volumes:
- ${PROJECT_ROOT}/apps/passbolt/data/gpg:/etc/passbolt/gpg
- ${PROJECT_ROOT}/apps/passbolt/data/jwt:/etc/passbolt/jwt
command:
[
"/usr/bin/wait-for.sh",
"-t",
"0",
"passbolt-db:3306",
"--",
"/docker-entrypoint.sh",
]
networks:
- traefik
- passbolt
labels:
- "traefik.http.routers.passbolt.rule=Host(`passbolt.lan.ddnsgeek.com`)"
- "traefik.enable=true"
- "traefik.http.routers.passbolt.entrypoints=websecure"
- "traefik.http.routers.passbolt.tls.certresolver=myresolver"
- "io.portainer.accesscontrol.public"
- "traefik.docker.network=core_traefik"
healthcheck:
test: ["CMD-SHELL", "curl -fsS http://localhost/healthcheck/status | grep -qx OK"]
# su -s /bin/sh -c "/usr/share/php/passbolt/bin/cake passbolt healthcheck" www-data
# | grep -q "No error found"
interval: 30s
timeout: 10s
retries: 6
start_period: 120s
networks:
# traefik_reverse_proxy:
# external: true
# internal:
# driver: bridge
passbolt:
+6
View File
@@ -0,0 +1,6 @@
FROM searxng/searxng:latest
RUN chown -R searxng:searxng /etc
USER searxng:searxng
+32
View File
@@ -0,0 +1,32 @@
services:
searxng-webapp:
image: searxng/searxng
profiles: ["apps","all","searxng"]
container_name: searxng-webapp
restart: always
read_only: true
tmpfs:
- /tmp
- /var
- /run
hostname: searxng.lan.ddnsgeek.com
networks:
- traefik
labels:
- "traefik.http.routers.searxng.rule=Host(`searxng.lan.ddnsgeek.com`)"
- "traefik.enable=true"
- "traefik.http.routers.searxng.entrypoints=websecure"
- "traefik.http.routers.searxng.tls.certresolver=myresolver"
- "io.portainer.accesscontrol.public"
# - "traefik.http.routers.searxng.middlewares=crowdsec@file,secHeaders@file,error-pages-middleware"
- "traefik.http.services.searxng.loadbalancer.server.port=8080"
healthcheck:
test: ["CMD-SHELL", "python3 -c \"import urllib.request,sys; r=urllib.request.urlopen('http://127.0.0.1:8080/', timeout=3); sys.exit(0 if 200<=r.status<400 else 1)\""]
interval: 20s
timeout: 5s
retries: 8
start_period: 30s
#networks:
# traefik_reverse_proxy:
# external: true
+178
View File
@@ -0,0 +1,178 @@
#!/bin/sh
help() {
cat <<EOF
Command line:
-h Display this help
-d Dry run to update the configuration files.
-f Always update on the configuration files (existing files are renamed with
the .old suffix). Without this option, the new configuration files are
copied with the .new suffix
Environment variables:
INSTANCE_NAME settings.yml : general.instance_name
AUTOCOMPLETE settings.yml : search.autocomplete
BASE_URL settings.yml : server.base_url
MORTY_URL settings.yml : result_proxy.url
MORTY_KEY settings.yml : result_proxy.key
BIND_ADDRESS uwsgi bind to the specified TCP socket using HTTP protocol.
Default value: ${DEFAULT_BIND_ADDRESS}
Volume:
/etc/searxng the docker entry point copies settings.yml and uwsgi.ini in
this directory (see the -f command line option)"
EOF
}
export DEFAULT_BIND_ADDRESS="0.0.0.0:8080"
export BIND_ADDRESS="${BIND_ADDRESS:-${DEFAULT_BIND_ADDRESS}}"
# Parse command line
FORCE_CONF_UPDATE=0
DRY_RUN=0
while getopts "fdh" option
do
case $option in
f) FORCE_CONF_UPDATE=1 ;;
d) DRY_RUN=1 ;;
h)
help
exit 0
;;
*)
echo "unknow option ${option}"
exit 42
;;
esac
done
get_searxng_version(){
su searxng -c \
'python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)"' \
2>/dev/null
}
SEARXNG_VERSION="$(get_searxng_version)"
export SEARXNG_VERSION
echo "SearXNG version ${SEARXNG_VERSION}"
# helpers to update the configuration files
patch_uwsgi_settings() {
CONF="$1"
# update uwsg.ini
sed -i \
-e "s|workers = .*|workers = ${UWSGI_WORKERS:-%k}|g" \
-e "s|threads = .*|threads = ${UWSGI_THREADS:-4}|g" \
"${CONF}"
}
patch_searxng_settings() {
CONF="$1"
# Make sure that there is trailing slash at the end of BASE_URL
# see https://www.gnu.org/savannah-checkouts/gnu/bash/manual/bash.html#Shell-Parameter-Expansion
export BASE_URL="${BASE_URL%/}/"
# update settings.yml
sed -i \
-e "s|base_url: false|base_url: ${BASE_URL}|g" \
-e "s/instance_name: \"SearXNG\"/instance_name: \"${INSTANCE_NAME}\"/g" \
-e "s/autocomplete: \"\"/autocomplete: \"${AUTOCOMPLETE}\"/g" \
-e "s/ultrasecretkey/$(openssl rand -hex 32)/g" \
"${CONF}"
# Morty configuration
if [ -n "${MORTY_KEY}" ] && [ -n "${MORTY_URL}" ]; then
sed -i -e "s/image_proxy: false/image_proxy: true/g" \
"${CONF}"
cat >> "${CONF}" <<-EOF
# Morty configuration
result_proxy:
url: ${MORTY_URL}
key: !!binary "${MORTY_KEY}"
EOF
fi
}
update_conf() {
FORCE_CONF_UPDATE=$1
CONF="$2"
NEW_CONF="${2}.new"
OLD_CONF="${2}.old"
REF_CONF="$3"
PATCH_REF_CONF="$4"
if [ -f "${CONF}" ]; then
if [ "${REF_CONF}" -nt "${CONF}" ]; then
# There is a new version
if [ "$FORCE_CONF_UPDATE" -ne 0 ]; then
# Replace the current configuration
printf '⚠️ Automatically update %s to the new version\n' "${CONF}"
if [ ! -f "${OLD_CONF}" ]; then
printf 'The previous configuration is saved to %s\n' "${OLD_CONF}"
mv "${CONF}" "${OLD_CONF}"
fi
cp "${REF_CONF}" "${CONF}"
$PATCH_REF_CONF "${CONF}"
else
# Keep the current configuration
printf '⚠️ Check new version %s to make sure SearXNG is working properly\n' "${NEW_CONF}"
cp "${REF_CONF}" "${NEW_CONF}"
$PATCH_REF_CONF "${NEW_CONF}"
fi
else
printf 'Use existing %s\n' "${CONF}"
fi
else
printf 'Create %s\n' "${CONF}"
cp "${REF_CONF}" "${CONF}"
$PATCH_REF_CONF "${CONF}"
fi
}
# searx compatibility: copy /etc/searx/* to /etc/searxng/*
SEARX_CONF=0
if [ -f "/etc/searx/settings.yml" ]; then
if [ ! -f "${SEARXNG_SETTINGS_PATH}" ]; then
printf '⚠️ /etc/searx/settings.yml is copied to /etc/searxng\n'
cp "/etc/searx/settings.yml" "${SEARXNG_SETTINGS_PATH}"
fi
SEARX_CONF=1
fi
if [ -f "/etc/searx/uwsgi.ini" ]; then
printf '⚠️ /etc/searx/uwsgi.ini is ignored. Use the volume /etc/searxng\n'
SEARX_CONF=1
fi
if [ "$SEARX_CONF" -eq "1" ]; then
printf '⚠️ The deprecated volume /etc/searx is mounted. Please update your configuration to use /etc/searxng ⚠️\n'
cat << EOF > /etc/searx/deprecated_volume_read_me.txt
This Docker image uses the volume /etc/searxng
Update your configuration:
* remove uwsgi.ini (or very carefully update your existing uwsgi.ini using https://github.com/searxng/searxng/blob/master/dockerfiles/uwsgi.ini )
* mount /etc/searxng instead of /etc/searx
EOF
fi
# end of searx compatibility
# make sure there are uwsgi settings
update_conf "${FORCE_CONF_UPDATE}" "${UWSGI_SETTINGS_PATH}" "/usr/local/searxng/dockerfiles/uwsgi.ini" "patch_uwsgi_settings"
# make sure there are searxng settings
update_conf "${FORCE_CONF_UPDATE}" "${SEARXNG_SETTINGS_PATH}" "/usr/local/searxng/searx/settings.yml" "patch_searxng_settings"
# dry run (to update configuration files, then inspect them)
if [ $DRY_RUN -eq 1 ]; then
printf 'Dry run\n'
exit
fi
unset MORTY_KEY
# Start uwsgi
printf 'Listen on %s\n' "${BIND_ADDRESS}"
exec su-exec searxng:searxng uwsgi --master --http-socket "${BIND_ADDRESS}" "${UWSGI_SETTINGS_PATH}"
+205
View File
@@ -0,0 +1,205 @@
# -*- coding: utf-8 -*-
# SPDX-License-Identifier: AGPL-3.0-or-later
import sys, os
from pallets_sphinx_themes import ProjectLink
from searx import get_setting
from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
# Project --------------------------------------------------------------
project = 'SearXNG'
copyright = 'SearXNG team'
author = 'SearXNG team'
release, version = VERSION_STRING, VERSION_STRING
SEARXNG_URL = get_setting('server.base_url') or 'https://example.org/searxng'
ISSUE_URL = get_setting('brand.issue_url')
DOCS_URL = get_setting('brand.docs_url')
PUBLIC_INSTANCES = get_setting('brand.public_instances')
PRIVACYPOLICY_URL = get_setting('general.privacypolicy_url')
CONTACT_URL = get_setting('general.contact_url')
WIKI_URL = get_setting('brand.wiki_url')
# hint: sphinx.ext.viewcode won't highlight when 'highlight_language' [1] is set
# to string 'none' [2]
#
# [1] https://www.sphinx-doc.org/en/master/usage/extensions/viewcode.html
# [2] https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-highlight_language
highlight_language = 'default'
# General --------------------------------------------------------------
master_doc = "index"
source_suffix = '.rst'
numfig = True
exclude_patterns = ['build-templates/*.rst', 'user/*.md']
import searx.engines
import searx.plugins
import searx.webutils
# import searx.webapp is needed to init the engines & plugins, to init a
# (empty) secret_key is needed.
searx.settings['server']['secret_key'] = ''
import searx.webapp
searx.engines.load_engines(searx.settings['engines'])
jinja_contexts = {
'searx': {
'engines': searx.engines.engines,
'plugins': searx.plugins.plugins,
'version': {
'node': os.getenv('NODE_MINIMUM_VERSION')
},
'enabled_engine_count': sum(not x.disabled for x in searx.engines.engines.values()),
'categories': searx.engines.categories,
'categories_as_tabs': {c: searx.engines.categories[c] for c in searx.settings['categories_as_tabs']},
},
}
jinja_filters = {
'group_engines_in_tab': searx.webutils.group_engines_in_tab,
}
# Let the Jinja template in configured_engines.rst access documented_modules
# to automatically link documentation for modules if it exists.
def setup(app):
ENGINES_DOCNAME = 'user/configured_engines'
def before_read_docs(app, env, docnames):
assert ENGINES_DOCNAME in docnames
docnames.remove(ENGINES_DOCNAME)
docnames.append(ENGINES_DOCNAME)
# configured_engines must come last so that sphinx already has
# discovered the python module documentations
def source_read(app, docname, source):
if docname == ENGINES_DOCNAME:
jinja_contexts['searx']['documented_modules'] = app.env.domains['py'].modules
app.connect('env-before-read-docs', before_read_docs)
app.connect('source-read', source_read)
# usage:: lorem :patch:`f373169` ipsum
extlinks = {}
# upstream links
extlinks['wiki'] = ('https://github.com/searxng/searxng/wiki/%s', ' %s')
extlinks['pull'] = ('https://github.com/searxng/searxng/pull/%s', 'PR %s')
extlinks['pull-searx'] = ('https://github.com/searx/searx/pull/%s', 'PR %s')
# links to custom brand
extlinks['origin'] = (GIT_URL + '/blob/' + GIT_BRANCH + '/%s', 'git://%s')
extlinks['patch'] = (GIT_URL + '/commit/%s', '#%s')
extlinks['docs'] = (DOCS_URL + '/%s', 'docs: %s')
extlinks['pypi'] = ('https://pypi.org/project/%s', 'PyPi: %s')
extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '%s')
#extlinks['role'] = (
# 'https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-%s', '')
extlinks['duref'] = (
'https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#%s', '%s')
extlinks['durole'] = (
'https://docutils.sourceforge.io/docs/ref/rst/roles.html#%s', '%s')
extlinks['dudir'] = (
'https://docutils.sourceforge.io/docs/ref/rst/directives.html#%s', '%s')
extlinks['ctan'] = (
'https://ctan.org/pkg/%s', 'CTAN: %s')
extensions = [
'sphinx.ext.imgmath',
'sphinx.ext.extlinks',
'sphinx.ext.viewcode',
"sphinx.ext.autodoc",
"sphinx.ext.intersphinx",
"pallets_sphinx_themes",
"sphinx_issues", # https://github.com/sloria/sphinx-issues/blob/master/README.rst
"sphinx_jinja", # https://github.com/tardyp/sphinx-jinja
"sphinxcontrib.programoutput", # https://github.com/NextThought/sphinxcontrib-programoutput
'linuxdoc.kernel_include', # Implementation of the 'kernel-include' reST-directive.
'linuxdoc.rstFlatTable', # Implementation of the 'flat-table' reST-directive.
'linuxdoc.kfigure', # Sphinx extension which implements scalable image handling.
"sphinx_tabs.tabs", # https://github.com/djungelorm/sphinx-tabs
'myst_parser', # https://www.sphinx-doc.org/en/master/usage/markdown.html
'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page
]
autodoc_default_options = {
'member-order': 'groupwise',
}
myst_enable_extensions = [
"replacements", "smartquotes"
]
suppress_warnings = ['myst.domains']
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"babel" : ("https://babel.readthedocs.io/en/latest/", None),
"flask": ("https://flask.palletsprojects.com/", None),
"flask_babel": ("https://python-babel.github.io/flask-babel/", None),
# "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
"jinja": ("https://jinja.palletsprojects.com/", None),
"linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
"redis": ('https://redis.readthedocs.io/en/stable/', None),
}
issues_github_path = "searxng/searxng"
# HTML -----------------------------------------------------------------
# https://searxng.github.io/searxng --> '/searxng/'
# https://docs.searxng.org --> '/'
notfound_urls_prefix = '/'
sys.path.append(os.path.abspath('_themes'))
sys.path.insert(0, os.path.abspath("../utils/"))
html_theme_path = ['_themes']
html_theme = "searxng"
# sphinx.ext.imgmath setup
html_math_renderer = 'imgmath'
imgmath_image_format = 'svg'
imgmath_font_size = 14
# sphinx.ext.imgmath setup END
html_show_sphinx = False
html_theme_options = {"index_sidebar_logo": True}
html_context = {"project_links": [] }
html_context["project_links"].append(ProjectLink("Source", GIT_URL + '/tree/' + GIT_BRANCH))
if WIKI_URL:
html_context["project_links"].append(ProjectLink("Wiki", WIKI_URL))
if PUBLIC_INSTANCES:
html_context["project_links"].append(ProjectLink("Public instances", PUBLIC_INSTANCES))
if ISSUE_URL:
html_context["project_links"].append(ProjectLink("Issue Tracker", ISSUE_URL))
if PRIVACYPOLICY_URL:
html_context["project_links"].append(ProjectLink("Privacy Policy", PRIVACYPOLICY_URL))
if CONTACT_URL:
html_context["project_links"].append(ProjectLink("Contact", CONTACT_URL))
html_sidebars = {
"**": [
"globaltoc.html",
"project.html",
"relations.html",
"searchbox.html",
"sourcelink.html"
],
}
singlehtml_sidebars = {"index": ["project.html", "localtoc.html"]}
html_logo = "../src/brand/searxng-wordmark.svg"
html_title = "SearXNG Documentation ({})".format(VERSION_STRING)
html_show_sourcelink = True
# LaTeX ----------------------------------------------------------------
latex_documents = [
(master_doc, "searxng-{}.tex".format(VERSION_STRING), html_title, author, "manual")
]
+1
View File
@@ -0,0 +1 @@
*.md
+25
View File
@@ -0,0 +1,25 @@
categories = ['general'] # optional
def request(query, params):
'''pre-request callback
params<dict>:
method : POST/GET
headers : {}
data : {} # if method == POST
url : ''
category: 'search category'
pageno : 1 # number of the requested page
'''
params['url'] = 'https://host/%s' % query
return params
def response(resp):
'''post-response callback
resp: requests response object
'''
return [{'url': '', 'title': '', 'content': ''}]
+106
View File
@@ -0,0 +1,106 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=missing-module-docstring
import sys
import os
from os.path import dirname, abspath
import logging
import searx.unixthreadname
import searx.settings_loader
from searx.settings_defaults import settings_set_defaults
# Debug
LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s'
# Production
LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
LOG_LEVEL_PROD = logging.WARNING
searx_dir = abspath(dirname(__file__))
searx_parent_dir = abspath(dirname(dirname(__file__)))
settings, settings_load_message = searx.settings_loader.load_settings()
if settings is not None:
settings = settings_set_defaults(settings)
_unset = object()
def get_setting(name, default=_unset):
"""Returns the value to which ``name`` point. If there is no such name in the
settings and the ``default`` is unset, a :py:obj:`KeyError` is raised.
"""
value = settings
for a in name.split('.'):
if isinstance(value, dict):
value = value.get(a, _unset)
else:
value = _unset
if value is _unset:
if default is _unset:
raise KeyError(name)
value = default
break
return value
def is_color_terminal():
if os.getenv('TERM') in ('dumb', 'unknown'):
return False
return sys.stdout.isatty()
def logging_config_debug():
try:
import coloredlogs # pylint: disable=import-outside-toplevel
except ImportError:
coloredlogs = None
log_level = os.environ.get('SEARXNG_DEBUG_LOG_LEVEL', 'DEBUG')
if coloredlogs and is_color_terminal():
level_styles = {
'spam': {'color': 'green', 'faint': True},
'debug': {},
'notice': {'color': 'magenta'},
'success': {'bold': True, 'color': 'green'},
'info': {'bold': True, 'color': 'cyan'},
'warning': {'color': 'yellow'},
'error': {'color': 'red'},
'critical': {'bold': True, 'color': 'red'},
}
field_styles = {
'asctime': {'color': 'green'},
'hostname': {'color': 'magenta'},
'levelname': {'color': 8},
'name': {'color': 8},
'programname': {'color': 'cyan'},
'username': {'color': 'yellow'},
}
coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG)
else:
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
searx_debug = settings['general']['debug']
if searx_debug:
logging_config_debug()
else:
logging.basicConfig(level=LOG_LEVEL_PROD, format=LOG_FORMAT_PROD)
logging.root.setLevel(level=LOG_LEVEL_PROD)
logging.getLogger('werkzeug').setLevel(level=LOG_LEVEL_PROD)
logger = logging.getLogger('searx')
logger.info(settings_load_message)
# log max_request_timeout
max_request_timeout = settings['outgoing']['max_request_timeout']
if max_request_timeout is None:
logger.info('max_request_timeout=%s', repr(max_request_timeout))
else:
logger.info('max_request_timeout=%i second(s)', max_request_timeout)
+46
View File
@@ -0,0 +1,46 @@
from os import listdir
from os.path import realpath, dirname, join, isdir
from searx.utils import load_module
from collections import defaultdict
answerers_dir = dirname(realpath(__file__))
def load_answerers():
answerers = []
for filename in listdir(answerers_dir):
if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
continue
module = load_module('answerer.py', join(answerers_dir, filename))
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
exit(2)
answerers.append(module)
return answerers
def get_answerers_by_keywords(answerers):
by_keyword = defaultdict(list)
for answerer in answerers:
for keyword in answerer.keywords:
for keyword in answerer.keywords:
by_keyword[keyword].append(answerer.answer)
return by_keyword
def ask(query):
results = []
query_parts = list(filter(None, query.query.split()))
if not query_parts or query_parts[0] not in answerers_by_keywords:
return results
for answerer in answerers_by_keywords[query_parts[0]]:
result = answerer(query)
if result:
results.append(result)
return results
answerers = load_answerers()
answerers_by_keywords = get_answerers_by_keywords(answerers)
+70
View File
@@ -0,0 +1,70 @@
import hashlib
import random
import string
import uuid
from flask_babel import gettext
# required answerer attribute
# specifies which search query keywords triggers this answerer
keywords = ('random',)
random_int_max = 2**31
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_characters():
return [random.choice(random_string_letters) for _ in range(random.randint(8, 32))]
def random_string():
return ''.join(random_characters())
def random_float():
return str(random.random())
def random_int():
return str(random.randint(-random_int_max, random_int_max))
def random_sha256():
m = hashlib.sha256()
m.update(''.join(random_characters()).encode())
return str(m.hexdigest())
def random_uuid():
return str(uuid.uuid4())
random_types = {
'string': random_string,
'int': random_int,
'float': random_float,
'sha256': random_sha256,
'uuid': random_uuid,
}
# required answerer function
# can return a list of results (any result type) for a given query
def answer(query):
parts = query.query.split()
if len(parts) != 2:
return []
if parts[1] not in random_types:
return []
return [{'answer': random_types[parts[1]]()}]
# required answerer function
# returns information about the answerer
def self_info():
return {
'name': gettext('Random value generator'),
'description': gettext('Generate different random values'),
'examples': ['random {}'.format(x) for x in random_types],
}
+50
View File
@@ -0,0 +1,50 @@
from functools import reduce
from operator import mul
from flask_babel import gettext
keywords = ('min', 'max', 'avg', 'sum', 'prod')
# required answerer function
# can return a list of results (any result type) for a given query
def answer(query):
parts = query.query.split()
if len(parts) < 2:
return []
try:
args = list(map(float, parts[1:]))
except:
return []
func = parts[0]
answer = None
if func == 'min':
answer = min(args)
elif func == 'max':
answer = max(args)
elif func == 'avg':
answer = sum(args) / len(args)
elif func == 'sum':
answer = sum(args)
elif func == 'prod':
answer = reduce(mul, args, 1)
if answer is None:
return []
return [{'answer': str(answer)}]
# required answerer function
# returns information about the answerer
def self_info():
return {
'name': gettext('Statistics functions'),
'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)),
'examples': ['avg 123 548 2.04 24.2'],
}
+228
View File
@@ -0,0 +1,228 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This module implements functions needed for the autocompleter.
"""
# pylint: disable=use-dict-literal
import json
from urllib.parse import urlencode
import lxml
from httpx import HTTPError
from searx import settings
from searx.engines import (
engines,
google,
)
from searx.network import get as http_get
from searx.exceptions import SearxEngineResponseException
def get(*args, **kwargs):
if 'timeout' not in kwargs:
kwargs['timeout'] = settings['outgoing']['request_timeout']
kwargs['raise_for_httperror'] = True
return http_get(*args, **kwargs)
def brave(query, _lang):
# brave search autocompleter
url = 'https://search.brave.com/api/suggest?'
url += urlencode({'q': query})
country = 'all'
# if lang in _brave:
# country = lang
kwargs = {'cookies': {'country': country}}
resp = get(url, **kwargs)
results = []
if resp.ok:
data = resp.json()
for item in data[1]:
results.append(item)
return results
def dbpedia(query, _lang):
# dbpedia autocompleter, no HTTPS
autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
response = get(autocomplete_url + urlencode(dict(QueryString=query)))
results = []
if response.ok:
dom = lxml.etree.fromstring(response.content)
results = dom.xpath('//Result/Label//text()')
return results
def duckduckgo(query, sxng_locale):
"""Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
traits = engines['duckduckgo'].traits
args = {
'q': query,
'kl': traits.get_region(sxng_locale, traits.all_locale),
}
url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
resp = get(url)
ret_val = []
if resp.ok:
j = resp.json()
if len(j) > 1:
ret_val = j[1]
return ret_val
def google_complete(query, sxng_locale):
"""Autocomplete from Google. Supports Google's languages and subdomains
(:py:obj:`searx.engines.google.get_google_info`) by using the async REST
API::
https://{subdomain}/complete/search?{args}
"""
google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
url = 'https://{subdomain}/complete/search?{args}'
args = urlencode(
{
'q': query,
'client': 'gws-wiz',
'hl': google_info['params']['hl'],
}
)
results = []
resp = get(url.format(subdomain=google_info['subdomain'], args=args))
if resp.ok:
json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
data = json.loads(json_txt)
for item in data[0]:
results.append(lxml.html.fromstring(item[0]).text_content())
return results
def seznam(query, _lang):
# seznam search autocompleter
url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
resp = get(
url.format(
query=urlencode(
{'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
)
)
)
if not resp.ok:
return []
data = resp.json()
return [
''.join([part.get('text', '') for part in item.get('text', [])])
for item in data.get('result', [])
if item.get('itemType', None) == 'ItemType.TEXT'
]
def startpage(query, sxng_locale):
"""Autocomplete from Startpage. Supports Startpage's languages"""
lui = engines['startpage'].traits.get_language(sxng_locale, 'english')
url = 'https://startpage.com/suggestions?{query}'
resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
data = resp.json()
return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
def swisscows(query, _lang):
# swisscows autocompleter
url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
resp = json.loads(get(url.format(query=urlencode({'query': query}))).text)
return resp
def qwant(query, sxng_locale):
"""Autocomplete from Qwant. Supports Qwant's regions."""
results = []
locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
url = 'https://api.qwant.com/v3/suggest?{query}'
resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
if resp.ok:
data = resp.json()
if data['status'] == 'success':
for item in data['data']['items']:
results.append(item['value'])
return results
def wikipedia(query, sxng_locale):
"""Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
results = []
eng_traits = engines['wikipedia'].traits
wiki_lang = eng_traits.get_language(sxng_locale, 'en')
wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
url = 'https://{wiki_netloc}/w/api.php?{args}'
args = urlencode(
{
'action': 'opensearch',
'format': 'json',
'formatversion': '2',
'search': query,
'namespace': '0',
'limit': '10',
}
)
resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
if resp.ok:
data = resp.json()
if len(data) > 1:
results = data[1]
return results
def yandex(query, _lang):
# yandex autocompleter
url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
if len(resp) > 1:
return resp[1]
return []
backends = {
'dbpedia': dbpedia,
'duckduckgo': duckduckgo,
'google': google_complete,
'seznam': seznam,
'startpage': startpage,
'swisscows': swisscows,
'qwant': qwant,
'wikipedia': wikipedia,
'brave': brave,
'yandex': yandex,
}
def search_autocomplete(backend_name, query, sxng_locale):
backend = backends.get(backend_name)
if backend is None:
return []
try:
return backend(query, sxng_locale)
except (HTTPError, SearxEngineResponseException):
return []
+51
View File
@@ -0,0 +1,51 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This module implements the :origin:`searxng_msg <babel.cfg>` extractor to
extract messages from:
- :origin:`searx/searxng.msg`
The ``searxng.msg`` files are selected by Babel_, see Babel's configuration in
:origin:`babel.cfg`::
searxng_msg = searx.babel_extract.extract
...
[searxng_msg: **/searxng.msg]
A ``searxng.msg`` file is a python file that is *executed* by the
:py:obj:`extract` function. Additional ``searxng.msg`` files can be added by:
1. Adding a ``searxng.msg`` file in one of the SearXNG python packages and
2. implement a method in :py:obj:`extract` that yields messages from this file.
.. _Babel: https://babel.pocoo.org/en/latest/index.html
"""
from os import path
SEARXNG_MSG_FILE = "searxng.msg"
_MSG_FILES = [path.join(path.dirname(__file__), SEARXNG_MSG_FILE)]
def extract(
# pylint: disable=unused-argument
fileobj,
keywords,
comment_tags,
options,
):
"""Extract messages from ``searxng.msg`` files by a custom extractor_.
.. _extractor:
https://babel.pocoo.org/en/latest/messages.html#writing-extraction-methods
"""
if fileobj.name not in _MSG_FILES:
raise RuntimeError("don't know how to extract messages from %s" % fileobj.name)
namespace = {}
exec(fileobj.read(), {}, namespace) # pylint: disable=exec-used
for name in namespace['__all__']:
for k, v in namespace[name].items():
yield 0, '_', v, ["%s['%s']" % (name, k)]
+27
View File
@@ -0,0 +1,27 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
""".. _botdetection src:
X-Forwarded-For
===============
.. attention::
A correct setup of the HTTP request headers ``X-Forwarded-For`` and
``X-Real-IP`` is essential to be able to assign a request to an IP correctly:
- `NGINX RequestHeader`_
- `Apache RequestHeader`_
.. _NGINX RequestHeader:
https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site
.. _Apache RequestHeader:
https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site
.. autofunction:: searx.botdetection.get_real_ip
"""
from ._helpers import dump_request
from ._helpers import get_real_ip
from ._helpers import too_many_requests
+120
View File
@@ -0,0 +1,120 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=missing-module-docstring, invalid-name
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
IPv4Address,
IPv6Address,
ip_network,
)
import flask
import werkzeug
from searx.tools import config
from searx import logger
logger = logger.getChild('botdetection')
def dump_request(request: flask.Request):
return (
request.path
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
+ " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
+ " || form: %s" % request.form
+ " || Accept: %s" % request.headers.get('Accept')
+ " || Accept-Language: %s" % request.headers.get('Accept-Language')
+ " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
+ " || Content-Type: %s" % request.headers.get('Content-Type')
+ " || Content-Length: %s" % request.headers.get('Content-Length')
+ " || Connection: %s" % request.headers.get('Connection')
+ " || User-Agent: %s" % request.headers.get('User-Agent')
)
def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
"""Returns a HTTP 429 response object and writes a ERROR message to the
'botdetection' logger. This function is used in part by the filter methods
to return the default ``Too Many Requests`` response.
"""
logger.debug("BLOCK %s: %s", network.compressed, log_msg)
return flask.make_response(('Too Many Requests', 429))
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
"""Returns the (client) network of whether the real_ip is part of."""
if real_ip.version == 6:
prefix = cfg['real_ip.ipv6_prefix']
else:
prefix = cfg['real_ip.ipv4_prefix']
network = ip_network(f"{real_ip}/{prefix}", strict=False)
# logger.debug("get_network(): %s", network.compressed)
return network
def get_real_ip(request: flask.Request) -> str:
"""Returns real IP of the request. Since not all proxies set all the HTTP
headers and incoming headers can be faked it may happen that the IP cannot
be determined correctly.
.. sidebar:: :py:obj:`flask.Request.remote_addr`
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
This function tries to get the remote IP in the order listed below,
additional some tests are done and if inconsistencies or errors are
detected, they are logged.
The remote IP of the request is taken from (first match):
- X-Forwarded-For_ header
- `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
- :py:obj:`flask.Request.remote_addr`
.. _ProxyFix:
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
"""
forwarded_for = request.headers.get("X-Forwarded-For")
real_ip = request.headers.get('X-Real-IP')
remote_addr = request.remote_addr
# logger.debug(
# "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
# )
if not forwarded_for:
logger.error("X-Forwarded-For header is not set!")
else:
from .limiter import get_cfg # pylint: disable=import-outside-toplevel, cyclic-import
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
x_for: int = get_cfg()['real_ip.x_for'] # type: ignore
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
if not real_ip:
logger.error("X-Real-IP header is not set!")
if forwarded_for and real_ip and forwarded_for != real_ip:
logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
if forwarded_for and remote_addr and forwarded_for != remote_addr:
logger.warning(
"IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
)
if real_ip and remote_addr and real_ip != remote_addr:
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
# logger.debug("get_real_ip() -> %s", request_ip)
return request_ip
+39
View File
@@ -0,0 +1,39 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_accept``
----------------------
The ``http_accept`` method evaluates a request as the request of a bot if the
Accept_ header ..
- did not contain ``text/html``
.. _Accept:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
"""
# pylint: disable=unused-argument
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from searx.tools import config
from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
if 'text/html' not in request.accept_mimetypes:
return too_many_requests(network, "HTTP header Accept did not contain text/html")
return None
+41
View File
@@ -0,0 +1,41 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_accept_encoding``
-------------------------------
The ``http_accept_encoding`` method evaluates a request as the request of a
bot if the Accept-Encoding_ header ..
- did not contain ``gzip`` AND ``deflate`` (if both values are missed)
- did not contain ``text/html``
.. _Accept-Encoding:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
"""
# pylint: disable=unused-argument
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from searx.tools import config
from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
if not ('gzip' in accept_list or 'deflate' in accept_list):
return too_many_requests(network, "HTTP header Accept-Encoding did not contain gzip nor deflate")
return None
+35
View File
@@ -0,0 +1,35 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_accept_language``
-------------------------------
The ``http_accept_language`` method evaluates a request as the request of a bot
if the Accept-Language_ header is unset.
.. _Accept-Language:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
"""
# pylint: disable=unused-argument
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from searx.tools import config
from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
if request.headers.get('Accept-Language', '').strip() == '':
return too_many_requests(network, "missing HTTP header Accept-Language")
return None
+37
View File
@@ -0,0 +1,37 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_connection``
--------------------------
The ``http_connection`` method evaluates a request as the request of a bot if
the Connection_ header is set to ``close``.
.. _Connection:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
"""
# pylint: disable=unused-argument
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from searx.tools import config
from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
if request.headers.get('Connection', '').strip() == 'close':
return too_many_requests(network, "HTTP header 'Connection=close")
return None
+67
View File
@@ -0,0 +1,67 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``http_user_agent``
--------------------------
The ``http_user_agent`` method evaluates a request as the request of a bot if
the User-Agent_ header is unset or matches the regular expression
:py:obj:`USER_AGENT`.
.. _User-Agent:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
"""
# pylint: disable=unused-argument
from __future__ import annotations
import re
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from searx.tools import config
from ._helpers import too_many_requests
USER_AGENT = (
r'('
+ r'unknown'
+ r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
+ r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
+ r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
+ r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
+ r'|ZmEu|BLEXBot|bitlybot'
# unmaintained Farside instances
+ r'|'
+ re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
# other bots and client to block
+ '|.*PetalBot.*'
+ r')'
)
"""Regular expression that matches to User-Agent_ from known *bots*"""
_regexp = None
def regexp_user_agent():
global _regexp # pylint: disable=global-statement
if not _regexp:
_regexp = re.compile(USER_AGENT)
return _regexp
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
user_agent = request.headers.get('User-Agent', 'unknown')
if regexp_user_agent().match(user_agent):
return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")
return None
+148
View File
@@ -0,0 +1,148 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
""".. _botdetection.ip_limit:
Method ``ip_limit``
-------------------
The ``ip_limit`` method counts request from an IP in *sliding windows*. If
there are to many requests in a sliding window, the request is evaluated as a
bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_
header. To take privacy only the hash value of an IP is stored in the redis DB
and at least for a maximum of 10 minutes.
The :py:obj:`.link_token` method can be used to investigate whether a request is
*suspicious*. To activate the :py:obj:`.link_token` method in the
:py:obj:`.ip_limit` method add the following to your
``/etc/searxng/limiter.toml``:
.. code:: toml
[botdetection.ip_limit]
link_token = true
If the :py:obj:`.link_token` method is activated and a request is *suspicious*
the request rates are reduced:
- :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
- :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
To intercept bots that get their IPs from a range of IPs, there is a
:py:obj:`SUSPICIOUS_IP_WINDOW`. In this window the suspicious IPs are stored
for a longer time. IPs stored in this sliding window have a maximum of
:py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked. As soon as the IP
makes a request that is not suspicious, the sliding window for this IP is
droped.
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
"""
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from searx.tools import config
from searx import redisdb
from searx.redislib import incr_sliding_window, drop_counter
from . import link_token
from ._helpers import (
too_many_requests,
logger,
)
logger = logger.getChild('ip_limit')
BURST_WINDOW = 20
"""Time (sec) before sliding window for *burst* requests expires."""
BURST_MAX = 15
"""Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
BURST_MAX_SUSPICIOUS = 2
"""Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
LONG_WINDOW = 600
"""Time (sec) before the longer sliding window expires."""
LONG_MAX = 150
"""Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
LONG_MAX_SUSPICIOUS = 10
"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
API_WONDOW = 3600
"""Time (sec) before sliding window for API requests (format != html) expires."""
API_MAX = 4
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
"""Time (sec) before sliding window for one suspicious IP expires."""
SUSPICIOUS_IP_MAX = 3
"""Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
redis_client = redisdb.client()
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
return None
if request.args.get('format', 'html') != 'html':
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
if c > API_MAX:
return too_many_requests(network, "too many request in API_WINDOW")
if cfg['botdetection.ip_limit.link_token']:
suspicious = link_token.is_suspicious(network, request, True)
if not suspicious:
# this IP is no longer suspicious: release ip again / delete the counter of this IP
drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
return None
# this IP is suspicious: count requests from this IP
c = incr_sliding_window(
redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
)
if c > SUSPICIOUS_IP_MAX:
logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
return flask.redirect(flask.url_for('index'), code=302)
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
if c > BURST_MAX_SUSPICIOUS:
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
if c > LONG_MAX_SUSPICIOUS:
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
return None
# vanilla limiter without extensions counts BURST_MAX and LONG_MAX
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
if c > BURST_MAX:
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
if c > LONG_MAX:
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")
return None
+85
View File
@@ -0,0 +1,85 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
""".. _botdetection.ip_lists:
Method ``ip_lists``
-------------------
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
:py:obj:`pass-lists <pass_ip>`.
.. code:: toml
[botdetection.ip_lists]
pass_ip = [
'140.238.172.132', # IPv4 of check.searx.space
'192.168.0.0/16', # IPv4 private network
'fe80::/10' # IPv6 linklocal
]
block_ip = [
'93.184.216.34', # IPv4 of example.org
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
]
"""
# pylint: disable=unused-argument
from __future__ import annotations
from typing import Tuple
from ipaddress import (
ip_network,
IPv4Address,
IPv6Address,
)
from searx.tools import config
from ._helpers import logger
logger = logger.getChild('ip_limit')
SEARXNG_ORG = [
# https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
'140.238.172.132', # IPv4 check.searx.space
'2603:c022:0:4900::/56', # IPv6 check.searx.space
]
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
"""Checks if the IP on the subnet is in one of the members of the
``botdetection.ip_lists.pass_ip`` list.
"""
if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
for net in SEARXNG_ORG:
net = ip_network(net, strict=False)
if real_ip.version == net.version and real_ip in net:
return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)
def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
"""Checks if the IP on the subnet is in one of the members of the
``botdetection.ip_lists.block_ip`` list.
"""
block, msg = ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.block_ip', cfg)
if block:
msg += " To remove IP from list, please contact the maintainer of the service."
return block, msg
def ip_is_subnet_of_member_in_list(
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
) -> Tuple[bool, str]:
for net in cfg.get(list_name, default=[]):
try:
net = ip_network(net, strict=False)
except ValueError:
logger.error("invalid IP %s in %s", net, list_name)
continue
if real_ip.version == net.version and real_ip in net:
return True, f"IP matches {net.compressed} in {list_name}."
return False, f"IP is not a member of an item in the f{list_name} list"
+147
View File
@@ -0,0 +1,147 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
""".. _limiter src:
Limiter
=======
.. sidebar:: info
The limiter requires a :ref:`Redis <settings redis>` database.
Bot protection / IP rate limitation. The intention of rate limitation is to
limit suspicious requests from an IP. The motivation behind this is the fact
that SearXNG passes through requests from bots and is thus classified as a bot
itself. As a result, the SearXNG engine then receives a CAPTCHA or is blocked
by the search engine (the origin) in some other way.
To avoid blocking, the requests from bots to SearXNG must also be blocked, this
is the task of the limiter. To perform this task, the limiter uses the methods
from the :py:obj:`searx.botdetection`.
To enable the limiter activate:
.. code:: yaml
server:
...
limiter: true # rate limit the number of request on the instance, block some bots
and set the redis-url connection. Check the value, it depends on your redis DB
(see :ref:`settings redis`), by example:
.. code:: yaml
redis:
url: unix:///usr/local/searxng-redis/run/redis.sock?db=0
"""
from __future__ import annotations
from pathlib import Path
from ipaddress import ip_address
import flask
import werkzeug
from searx.tools import config
from searx import logger
from . import (
http_accept,
http_accept_encoding,
http_accept_language,
http_connection,
http_user_agent,
ip_limit,
ip_lists,
)
from ._helpers import (
get_network,
get_real_ip,
dump_request,
)
logger = logger.getChild('botdetection.limiter')
CFG: config.Config = None # type: ignore
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
"""Base configuration (schema) of the botdetection."""
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
"""Lokal Limiter configuration."""
CFG_DEPRECATED = {
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
}
def get_cfg() -> config.Config:
global CFG # pylint: disable=global-statement
if CFG is None:
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
return CFG
def filter_request(request: flask.Request) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
cfg = get_cfg()
real_ip = ip_address(get_real_ip(request))
network = get_network(real_ip, cfg)
if request.path == '/healthz':
return None
# link-local
if network.is_link_local:
return None
# block- & pass- lists
#
# 1. The IP of the request is first checked against the pass-list; if the IP
# matches an entry in the list, the request is not blocked.
# 2. If no matching entry is found in the pass-list, then a check is made against
# the block list; if the IP matches an entry in the list, the request is
# blocked.
# 3. If the IP is not in either list, the request is not blocked.
match, msg = ip_lists.pass_ip(real_ip, cfg)
if match:
logger.warning("PASS %s: matched PASSLIST - %s", network.compressed, msg)
return None
match, msg = ip_lists.block_ip(real_ip, cfg)
if match:
logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
# methods applied on /
for func in [
http_user_agent,
]:
val = func.filter_request(network, request, cfg)
if val is not None:
return val
# methods applied on /search
if request.path == '/search':
for func in [
http_accept,
http_accept_encoding,
http_accept_language,
http_connection,
http_user_agent,
ip_limit,
]:
val = func.filter_request(network, request, cfg)
if val is not None:
return val
logger.debug(f"OK {network}: %s", dump_request(flask.request))
return None
+157
View File
@@ -0,0 +1,157 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Method ``link_token``
---------------------
The ``link_token`` method evaluates a request as :py:obj:`suspicious
<is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
client. By adding a random component (the token) in the URL, a bot can not send
a ping by request a static URL.
.. note::
This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
To get in use of this method a flask URL route needs to be added:
.. code:: python
@app.route('/client<token>.css', methods=['GET', 'POST'])
def client_token(token=None):
link_token.ping(request, token)
return Response('', mimetype='text/css')
And in the HTML template from flask a stylesheet link is needed (the value of
``link_token`` comes from :py:obj:`get_token`):
.. code:: html
<link rel="stylesheet"
href="{{ url_for('client_token', token=link_token) }}"
type="text/css" />
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
"""
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
ip_address,
)
import string
import random
import flask
from searx import logger
from searx import redisdb
from searx.redislib import secret_hash
from ._helpers import (
get_network,
get_real_ip,
)
TOKEN_LIVE_TIME = 600
"""Livetime (sec) of limiter's CSS token."""
PING_LIVE_TIME = 3600
"""Livetime (sec) of the ping-key from a client (request)"""
PING_KEY = 'SearXNG_limiter.ping'
"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
TOKEN_KEY = 'SearXNG_limiter.token'
"""Key for which the current token is stored in the DB"""
logger = logger.getChild('botdetection.link_token')
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
"""Checks whether a valid ping is exists for this (client) network, if not
this request is rated as *suspicious*. If a valid ping exists and argument
``renew`` is ``True`` the expire time of this ping is reset to
:py:obj:`PING_LIVE_TIME`.
"""
redis_client = redisdb.client()
if not redis_client:
return False
ping_key = get_ping_key(network, request)
if not redis_client.get(ping_key):
logger.warning("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
return True
if renew:
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
return False
def ping(request: flask.Request, token: str):
"""This function is called by a request to URL ``/client<token>.css``. If
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
"""
from . import limiter # pylint: disable=import-outside-toplevel, cyclic-import
redis_client = redisdb.client()
if not redis_client:
return
if not token_is_valid(token):
return
cfg = limiter.get_cfg()
real_ip = ip_address(get_real_ip(request))
network = get_network(real_ip, cfg)
ping_key = get_ping_key(network, request)
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
"""Generates a hashed key that fits (more or less) to a *WEB-browser
session* in a network."""
return (
PING_KEY
+ "["
+ secret_hash(
network.compressed + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
)
+ "]"
)
def token_is_valid(token) -> bool:
valid = token == get_token()
logger.debug("token is valid --> %s", valid)
return valid
def get_token() -> str:
"""Returns current token. If there is no currently active token a new token
is generated randomly and stored in the redis DB.
- :py:obj:`TOKEN_LIVE_TIME`
- :py:obj:`TOKEN_KEY`
"""
redis_client = redisdb.client()
if not redis_client:
# This function is also called when limiter is inactive / no redis DB
# (see render function in webapp.py)
return '12345678'
token = redis_client.get(TOKEN_KEY)
if token:
token = token.decode('UTF-8')
else:
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
return token
+73
View File
@@ -0,0 +1,73 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pyright: basic
"""Module for backward compatibility.
"""
# pylint: disable=C,R
__all__ = ('cached_property',)
try:
from functools import cached_property # type: ignore
except ImportError:
# cache_property has been added in py3.8 [1]
#
# To support cache_property in py3.7 the implementation from 3.8 has been
# copied here. This code can be cleanup with EOL of py3.7.
#
# [1] https://docs.python.org/3/library/functools.html#functools.cached_property
from threading import RLock
_NOT_FOUND = object()
class cached_property:
def __init__(self, func):
self.func = func
self.attrname = None
self.__doc__ = func.__doc__
self.lock = RLock()
def __set_name__(self, owner, name):
if self.attrname is None:
self.attrname = name
elif name != self.attrname:
raise TypeError(
"Cannot assign the same cached_property to two different names "
f"({self.attrname!r} and {name!r})."
)
def __get__(self, instance, owner=None):
if instance is None:
return self
if self.attrname is None:
raise TypeError("Cannot use cached_property instance without calling __set_name__ on it.")
try:
cache = instance.__dict__
except AttributeError: # not all objects have __dict__ (e.g. class defines slots)
msg = (
f"No '__dict__' attribute on {type(instance).__name__!r} "
f"instance to cache {self.attrname!r} property."
)
raise TypeError(msg) from None
val = cache.get(self.attrname, _NOT_FOUND)
if val is _NOT_FOUND:
with self.lock:
# check if another thread filled cache while we awaited lock
val = cache.get(self.attrname, _NOT_FOUND)
if val is _NOT_FOUND:
val = self.func(instance)
try:
cache[self.attrname] = val
except TypeError:
msg = (
f"The '__dict__' attribute on {type(instance).__name__!r} instance "
f"does not support item assignment for caching {self.attrname!r} property."
)
raise TypeError(msg) from None
return val
+52
View File
@@ -0,0 +1,52 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This module holds the *data* created by::
make data.all
"""
__all__ = [
'ENGINE_TRAITS',
'CURRENCIES',
'USER_AGENTS',
'EXTERNAL_URLS',
'WIKIDATA_UNITS',
'EXTERNAL_BANGS',
'OSM_KEYS_TAGS',
'ENGINE_DESCRIPTIONS',
'ahmia_blacklist_loader',
]
import json
from pathlib import Path
data_dir = Path(__file__).parent
def _load(filename):
with open(data_dir / filename, encoding='utf-8') as f:
return json.load(f)
def ahmia_blacklist_loader():
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
names. The MD5 values are fetched by::
searxng_extra/update/update_ahmia_blacklist.py
This function is used by :py:mod:`searx.plugins.ahmia_filter`.
"""
with open(data_dir / 'ahmia_blacklist.txt', encoding='utf-8') as f:
return f.read().split()
CURRENCIES = _load('currencies.json')
USER_AGENTS = _load('useragents.json')
EXTERNAL_URLS = _load('external_urls.json')
WIKIDATA_UNITS = _load('wikidata_units.json')
EXTERNAL_BANGS = _load('external_bangs.json')
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
ENGINE_TRAITS = _load('engine_traits.json')
+145
View File
@@ -0,0 +1,145 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Implementations of the framework for the SearXNG engines.
.. hint::
The long term goal is to modularize all implementations of the engine
framework here in this Python package. ToDo:
- move implementations of the :ref:`searx.engines loader` to a new module in
the :py:obj:`searx.enginelib` namespace.
"""
from __future__ import annotations
from typing import List, Callable, TYPE_CHECKING
if TYPE_CHECKING:
from searx.enginelib import traits
class Engine: # pylint: disable=too-few-public-methods
"""Class of engine instances build from YAML settings.
Further documentation see :ref:`general engine configuration`.
.. hint::
This class is currently never initialized and only used for type hinting.
"""
# Common options in the engine module
engine_type: str
"""Type of the engine (:ref:`searx.search.processors`)"""
paging: bool
"""Engine supports multiple pages."""
time_range_support: bool
"""Engine supports search time range."""
safesearch: bool
"""Engine supports SafeSearch"""
language_support: bool
"""Engine supports languages (locales) search."""
language: str
"""For an engine, when there is ``language: ...`` in the YAML settings the engine
does support only this one language:
.. code:: yaml
- name: google french
engine: google
language: fr
"""
region: str
"""For an engine, when there is ``region: ...`` in the YAML settings the engine
does support only this one region::
.. code:: yaml
- name: google belgium
engine: google
region: fr-BE
"""
fetch_traits: Callable
"""Function to to fetch engine's traits from origin."""
traits: traits.EngineTraits
"""Traits of the engine."""
# settings.yml
categories: List[str]
"""Specifies to which :ref:`engine categories` the engine should be added."""
name: str
"""Name that will be used across SearXNG to define this engine. In settings, on
the result page .."""
engine: str
"""Name of the python file used to handle requests and responses to and from
this search engine (file name from :origin:`searx/engines` without
``.py``)."""
enable_http: bool
"""Enable HTTP (by default only HTTPS is enabled)."""
shortcut: str
"""Code used to execute bang requests (``!foo``)"""
timeout: float
"""Specific timeout for search-engine."""
display_error_messages: bool
"""Display error messages on the web UI."""
proxies: dict
"""Set proxies for a specific engine (YAML):
.. code:: yaml
proxies :
http: socks5://proxy:port
https: socks5://proxy:port
"""
disabled: bool
"""To disable by default the engine, but not deleting it. It will allow the
user to manually activate it in the settings."""
inactive: bool
"""Remove the engine from the settings (*disabled & removed*)."""
about: dict
"""Additional fileds describing the engine.
.. code:: yaml
about:
website: https://example.com
wikidata_id: Q306656
official_api_documentation: https://example.com/api-doc
use_official_api: true
require_api_key: true
results: HTML
"""
using_tor_proxy: bool
"""Using tor proxy (``true``) or not (``false``) for this engine."""
send_accept_language_header: bool
"""When this option is activated, the language (locale) that is selected by
the user is used to build and send a ``Accept-Language`` header in the
request to the origin search engine."""
tokens: List[str]
"""A list of secret tokens to make this engine *private*, more details see
:ref:`private engines`."""
+252
View File
@@ -0,0 +1,252 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Engine's traits are fetched from the origin engines and stored in a JSON file
in the *data folder*. Most often traits are languages and region codes and
their mapping from SearXNG's representation to the representation in the origin
search engine. For new traits new properties can be added to the class
:py:class:`EngineTraits`.
To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
used.
"""
from __future__ import annotations
import json
import dataclasses
import types
from typing import Dict, Iterable, Union, Callable, Optional, TYPE_CHECKING
from typing_extensions import Literal, Self
from searx import locales
from searx.data import data_dir, ENGINE_TRAITS
if TYPE_CHECKING:
from . import Engine
class EngineTraitsEncoder(json.JSONEncoder):
"""Encodes :class:`EngineTraits` to a serializable object, see
:class:`json.JSONEncoder`."""
def default(self, o):
"""Return dictionary of a :class:`EngineTraits` object."""
if isinstance(o, EngineTraits):
return o.__dict__
return super().default(o)
@dataclasses.dataclass
class EngineTraits:
"""The class is intended to be instantiated for each engine."""
regions: Dict[str, str] = dataclasses.field(default_factory=dict)
"""Maps SearXNG's internal representation of a region to the one of the engine.
SearXNG's internal representation can be parsed by babel and the value is
send to the engine:
.. code:: python
regions ={
'fr-BE' : <engine's region name>,
}
for key, egnine_region regions.items():
searxng_region = babel.Locale.parse(key, sep='-')
...
"""
languages: Dict[str, str] = dataclasses.field(default_factory=dict)
"""Maps SearXNG's internal representation of a language to the one of the engine.
SearXNG's internal representation can be parsed by babel and the value is
send to the engine:
.. code:: python
languages = {
'ca' : <engine's language name>,
}
for key, egnine_lang in languages.items():
searxng_lang = babel.Locale.parse(key)
...
"""
all_locale: Optional[str] = None
"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default
language").
"""
data_type: Literal['traits_v1'] = 'traits_v1'
"""Data type, default is 'traits_v1'.
"""
custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict)
"""A place to store engine's custom traits, not related to the SearXNG core.
"""
def get_language(self, searxng_locale: str, default=None):
"""Return engine's language string that *best fits* to SearXNG's locale.
:param searxng_locale: SearXNG's internal representation of locale
selected by the user.
:param default: engine's default language
The *best fits* rules are implemented in
:py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
which is determined from :py:obj:`EngineTraits.all_locale`.
"""
if searxng_locale == 'all' and self.all_locale is not None:
return self.all_locale
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
def get_region(self, searxng_locale: str, default=None):
"""Return engine's region string that best fits to SearXNG's locale.
:param searxng_locale: SearXNG's internal representation of locale
selected by the user.
:param default: engine's default region
The *best fits* rules are implemented in
:py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
which is determined from :py:obj:`EngineTraits.all_locale`.
"""
if searxng_locale == 'all' and self.all_locale is not None:
return self.all_locale
return locales.get_engine_locale(searxng_locale, self.regions, default=default)
def is_locale_supported(self, searxng_locale: str) -> bool:
"""A *locale* (SearXNG's internal representation) is considered to be
supported by the engine if the *region* or the *language* is supported
by the engine.
For verification the functions :py:func:`EngineTraits.get_region` and
:py:func:`EngineTraits.get_language` are used.
"""
if self.data_type == 'traits_v1':
return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
raise TypeError('engine traits of type %s is unknown' % self.data_type)
def copy(self):
"""Create a copy of the dataclass object."""
return EngineTraits(**dataclasses.asdict(self))
@classmethod
def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
and set properties from the origin engine in the object ``engine_traits``. If
function does not exists, ``None`` is returned.
"""
fetch_traits = getattr(engine, 'fetch_traits', None)
engine_traits = None
if fetch_traits:
engine_traits = cls()
fetch_traits(engine_traits)
return engine_traits
def set_traits(self, engine: Engine):
"""Set traits from self object in a :py:obj:`.Engine` namespace.
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
"""
if self.data_type == 'traits_v1':
self._set_traits_v1(engine)
else:
raise TypeError('engine traits of type %s is unknown' % self.data_type)
def _set_traits_v1(self, engine: Engine):
# For an engine, when there is `language: ...` in the YAML settings the engine
# does support only this one language (region)::
#
# - name: google italian
# engine: google
# language: it
# region: it-IT
traits = self.copy()
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
languages = traits.languages
if hasattr(engine, 'language'):
if engine.language not in languages:
raise ValueError(_msg % (engine.name, 'language', engine.language))
traits.languages = {engine.language: languages[engine.language]}
regions = traits.regions
if hasattr(engine, 'region'):
if engine.region not in regions:
raise ValueError(_msg % (engine.name, 'region', engine.region))
traits.regions = {engine.region: regions[engine.region]}
engine.language_support = bool(traits.languages or traits.regions)
# set the copied & modified traits in engine's namespace
engine.traits = traits
class EngineTraitsMap(Dict[str, EngineTraits]):
"""A python dictionary to map :class:`EngineTraits` by engine name."""
ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
def save_data(self):
"""Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
@classmethod
def from_data(cls) -> Self:
"""Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
obj = cls()
for k, v in ENGINE_TRAITS.items():
obj[k] = EngineTraits(**v)
return obj
@classmethod
def fetch_traits(cls, log: Callable) -> Self:
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
names = list(engines.engines)
names.sort()
obj = cls()
for engine_name in names:
engine = engines.engines[engine_name]
traits = EngineTraits.fetch_traits(engine)
if traits is not None:
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
obj[engine_name] = traits
return obj
def set_traits(self, engine: Engine | types.ModuleType):
"""Set traits in a :py:obj:`Engine` namespace.
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
"""
engine_traits = EngineTraits(data_type='traits_v1')
if engine.name in self.keys():
engine_traits = self[engine.name]
elif engine.engine in self.keys():
# The key of the dictionary traits_map is the *engine name*
# configured in settings.xml. When multiple engines are configured
# in settings.yml to use the same origin engine (python module)
# these additional engines can use the languages from the origin
# engine. For this use the configured ``engine: ...`` from
# settings.yml
engine_traits = self[engine.engine]
engine_traits.set_traits(engine)
+57
View File
@@ -0,0 +1,57 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
1337x
"""
from urllib.parse import quote, urljoin
from lxml import html
from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://1337x.to/',
"wikidata_id": 'Q28134166',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/'
categories = ['files']
paging = True
def request(query, params):
params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'])
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
for result in eval_xpath_list(dom, '//table[contains(@class, "table-list")]/tbody//tr'):
href = urljoin(url, eval_xpath_getindex(result, './td[contains(@class, "name")]/a[2]/@href', 0))
title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]'))
seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]'))
leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]'))
filesize_info = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()'))
filesize, filesize_multiplier = filesize_info.split()
filesize = get_torrent_size(filesize, filesize_multiplier)
results.append(
{
'url': href,
'title': title,
'seed': seed,
'leech': leech,
'filesize': filesize,
'template': 'torrent.html',
}
)
return results
+77
View File
@@ -0,0 +1,77 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
# pylint: disable=invalid-name
"""9GAG (social media)"""
from json import loads
from datetime import datetime
from urllib.parse import urlencode
about = {
"website": 'https://9gag.com/',
"wikidata_id": 'Q277421',
"official_api_documentation": None,
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
categories = ['social media']
paging = True
search_url = "https://9gag.com/v1/search-posts?{query}"
page_size = 10
def request(query, params):
query = urlencode({'query': query, 'c': (params['pageno'] - 1) * page_size})
params['url'] = search_url.format(query=query)
return params
def response(resp):
results = []
json_results = loads(resp.text)['data']
for result in json_results['posts']:
result_type = result['type']
# Get the not cropped version of the thumbnail when the image height is not too important
if result['images']['image700']['height'] > 400:
thumbnail = result['images']['imageFbThumbnail']['url']
else:
thumbnail = result['images']['image700']['url']
if result_type == 'Photo':
results.append(
{
'template': 'images.html',
'url': result['url'],
'title': result['title'],
'content': result['description'],
'publishedDate': datetime.utcfromtimestamp(result['creationTs']),
'img_src': result['images']['image700']['url'],
'thumbnail_src': thumbnail,
}
)
elif result_type == 'Animated':
results.append(
{
'template': 'videos.html',
'url': result['url'],
'title': result['title'],
'content': result['description'],
'publishedDate': datetime.utcfromtimestamp(result['creationTs']),
'thumbnail': thumbnail,
'iframe_src': result['images'].get('image460sv', {}).get('url'),
}
)
if 'tags' in json_results:
for suggestion in json_results['tags']:
results.append({'suggestion': suggestion['key']})
return results
+253
View File
@@ -0,0 +1,253 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Load and initialize the ``engines``, see :py:func:`load_engines` and register
:py:obj:`engine_shortcuts`.
usage::
load_engines( settings['engines'] )
"""
from __future__ import annotations
import sys
import copy
from os.path import realpath, dirname
from typing import TYPE_CHECKING, Dict
import types
import inspect
from searx import logger, settings
from searx.utils import load_module
if TYPE_CHECKING:
from searx.enginelib import Engine
logger = logger.getChild('engines')
ENGINE_DIR = dirname(realpath(__file__))
ENGINE_DEFAULT_ARGS = {
# Common options in the engine module
"engine_type": "online",
"paging": False,
"time_range_support": False,
"safesearch": False,
# settings.yml
"categories": ["general"],
"enable_http": False,
"shortcut": "-",
"timeout": settings["outgoing"]["request_timeout"],
"display_error_messages": True,
"disabled": False,
"inactive": False,
"about": {},
"using_tor_proxy": False,
"send_accept_language_header": False,
"tokens": [],
}
# set automatically when an engine does not have any tab category
DEFAULT_CATEGORY = 'other'
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
categories = {'general': []}
engines: Dict[str, Engine | types.ModuleType] = {}
engine_shortcuts = {}
"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
::
engine_shortcuts[engine.shortcut] = engine.name
:meta hide-value:
"""
def check_engine_module(module: types.ModuleType):
# probe unintentional name collisions / for example name collisions caused
# by import statements in the engine module ..
# network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
obj = getattr(module, 'network', None)
if obj and inspect.ismodule(obj):
msg = f'type of {module.__name__}.network is a module ({obj.__name__}), expected a string'
# logger.error(msg)
raise TypeError(msg)
def load_engine(engine_data: dict) -> Engine | types.ModuleType | None:
"""Load engine from ``engine_data``.
:param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
:return: initialized namespace of the ``<engine>``.
1. create a namespace and load module of the ``<engine>``
2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS`
3. update namespace with values from ``engine_data``
If engine *is active*, return namespace of the engine, otherwise return
``None``.
This function also returns ``None`` if initialization of the namespace fails
for one of the following reasons:
- engine name contains underscore
- engine name is not lowercase
- required attribute is not set :py:func:`is_missing_required_attributes`
"""
# pylint: disable=too-many-return-statements
engine_name = engine_data.get('name')
if engine_name is None:
logger.error('An engine does not have a "name" field')
return None
if '_' in engine_name:
logger.error('Engine name contains underscore: "{}"'.format(engine_name))
return None
if engine_name.lower() != engine_name:
logger.warning('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
engine_name = engine_name.lower()
engine_data['name'] = engine_name
# load_module
module_name = engine_data.get('engine')
if module_name is None:
logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name))
return None
try:
engine = load_module(module_name + '.py', ENGINE_DIR)
except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
logger.exception('Fatal exception in engine "{}"'.format(module_name))
sys.exit(1)
except BaseException:
logger.exception('Cannot load engine "{}"'.format(module_name))
return None
check_engine_module(engine)
update_engine_attributes(engine, engine_data)
update_attributes_for_tor(engine)
# avoid cyclic imports
# pylint: disable=import-outside-toplevel
from searx.enginelib.traits import EngineTraitsMap
trait_map = EngineTraitsMap.from_data()
trait_map.set_traits(engine)
if not is_engine_active(engine):
return None
if is_missing_required_attributes(engine):
return None
set_loggers(engine, engine_name)
if not any(cat in settings['categories_as_tabs'] for cat in engine.categories):
engine.categories.append(DEFAULT_CATEGORY)
return engine
def set_loggers(engine, engine_name):
# set the logger for engine
engine.logger = logger.getChild(engine_name)
# the engine may have load some other engines
# may sure the logger is initialized
# use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"
# see https://github.com/python/cpython/issues/89516
# and https://docs.python.org/3.10/library/sys.html#sys.modules
modules = sys.modules.copy()
for module_name, module in modules.items():
if (
module_name.startswith("searx.engines")
and module_name != "searx.engines.__init__"
and not hasattr(module, "logger")
):
module_engine_name = module_name.split(".")[-1]
module.logger = logger.getChild(module_engine_name) # type: ignore
def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
# set engine attributes from engine_data
for param_name, param_value in engine_data.items():
if param_name == 'categories':
if isinstance(param_value, str):
param_value = list(map(str.strip, param_value.split(',')))
engine.categories = param_value # type: ignore
elif hasattr(engine, 'about') and param_name == 'about':
engine.about = {**engine.about, **engine_data['about']} # type: ignore
else:
setattr(engine, param_name, param_value)
# set default attributes
for arg_name, arg_value in ENGINE_DEFAULT_ARGS.items():
if not hasattr(engine, arg_name):
setattr(engine, arg_name, copy.deepcopy(arg_value))
def update_attributes_for_tor(engine: Engine | types.ModuleType):
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
def is_missing_required_attributes(engine):
"""An attribute is required when its name doesn't start with ``_`` (underline).
Required attributes must not be ``None``.
"""
missing = False
for engine_attr in dir(engine):
if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None:
logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr))
missing = True
return missing
def using_tor_proxy(engine: Engine | types.ModuleType):
"""Return True if the engine configuration declares to use Tor."""
return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
def is_engine_active(engine: Engine | types.ModuleType):
# check if engine is inactive
if engine.inactive is True:
return False
# exclude onion engines if not using tor
if 'onions' in engine.categories and not using_tor_proxy(engine):
return False
return True
def register_engine(engine: Engine | types.ModuleType):
if engine.name in engines:
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
sys.exit(1)
engines[engine.name] = engine
if engine.shortcut in engine_shortcuts:
logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut))
sys.exit(1)
engine_shortcuts[engine.shortcut] = engine.name
for category_name in engine.categories:
categories.setdefault(category_name, []).append(engine)
def load_engines(engine_list):
"""usage: ``engine_list = settings['engines']``"""
engines.clear()
engine_shortcuts.clear()
categories.clear()
categories['general'] = []
for engine_data in engine_list:
engine = load_engine(engine_data)
if engine:
register_engine(engine)
return engines
+80
View File
@@ -0,0 +1,80 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Ahmia (Onions)
"""
from urllib.parse import urlencode, urlparse, parse_qs
from lxml.html import fromstring
from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
# about
about = {
"website": 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion',
"wikidata_id": 'Q18693938',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine config
categories = ['onions']
paging = True
page_size = 10
# search url
search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}'
time_range_support = True
time_range_dict = {'day': 1, 'week': 7, 'month': 30}
# xpaths
results_xpath = '//li[@class="result"]'
url_xpath = './h4/a/@href'
title_xpath = './h4/a[1]'
content_xpath = './/p[1]'
correction_xpath = '//*[@id="didYouMean"]//a'
number_of_results_xpath = '//*[@id="totalResults"]'
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}))
if params['time_range'] in time_range_dict:
params['url'] += '&' + urlencode({'d': time_range_dict[params['time_range']]})
return params
def response(resp):
results = []
dom = fromstring(resp.text)
# trim results so there's not way too many at once
first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1)
all_results = eval_xpath_list(dom, results_xpath)
trimmed_results = all_results[first_result_index : first_result_index + page_size]
# get results
for result in trimmed_results:
# remove ahmia url and extract the actual url for the result
raw_url = extract_url(eval_xpath_list(result, url_xpath, min_len=1), search_url)
cleaned_url = parse_qs(urlparse(raw_url).query).get('redirect_url', [''])[0]
title = extract_text(eval_xpath(result, title_xpath))
content = extract_text(eval_xpath(result, content_xpath))
results.append({'url': cleaned_url, 'title': title, 'content': content, 'is_onion': True})
# get spelling corrections
for correction in eval_xpath_list(dom, correction_xpath):
results.append({'correction': extract_text(correction)})
# get number of results
number_of_results = eval_xpath(dom, number_of_results_xpath)
if number_of_results:
try:
results.append({'number_of_results': int(extract_text(number_of_results))})
except:
pass
return results
+187
View File
@@ -0,0 +1,187 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""`Anna's Archive`_ is a free non-profit online shadow library metasearch
engine providing access to a variety of book resources (also via IPFS), created
by a team of anonymous archivists (AnnaArchivist_).
.. _Anna's Archive: https://annas-archive.org/
.. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive
Configuration
=============
The engine has the following additional settings:
- :py:obj:`aa_content`
- :py:obj:`aa_ext`
- :py:obj:`aa_sort`
With this options a SearXNG maintainer is able to configure **additional**
engines for specific searches in Anna's Archive. For example a engine to search
for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.
.. code:: yaml
- name: annas articles
engine: annas_archive
shortcut: aaa
aa_content: 'journal_article'
aa_ext: 'pdf'
aa_sort: 'newest'
Implementations
===============
"""
from typing import List, Dict, Any, Optional
from urllib.parse import quote
from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list
from searx.enginelib.traits import EngineTraits
from searx.data import ENGINE_TRAITS
# about
about: Dict[str, Any] = {
"website": "https://annas-archive.org/",
"wikidata_id": "Q115288326",
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": "HTML",
}
# engine dependent config
categories: List[str] = ["files"]
paging: bool = False
# search-url
base_url: str = "https://annas-archive.org"
aa_content: str = ""
"""Anan's search form field **Content** / possible values::
journal_article, book_any, book_fiction, book_unknown, book_nonfiction,
book_comic, magazine, standards_document
To not filter use an empty string (default).
"""
aa_sort: str = ''
"""Sort Anna's results, possible values::
newest, oldest, largest, smallest
To sort by *most relevant* use an empty string (default)."""
aa_ext: str = ''
"""Filter Anna's results by a file ending. Common filters for example are
``pdf`` and ``epub``.
.. note::
Anna's Archive is a beta release: Filter results by file extension does not
really work on Anna's Archive.
"""
def init(engine_settings=None): # pylint: disable=unused-argument
"""Check of engine's settings."""
traits = EngineTraits(**ENGINE_TRAITS['annas archive'])
if aa_content and aa_content not in traits.custom['content']:
raise ValueError(f'invalid setting content: {aa_content}')
if aa_sort and aa_sort not in traits.custom['sort']:
raise ValueError(f'invalid setting sort: {aa_sort}')
if aa_ext and aa_ext not in traits.custom['ext']:
raise ValueError(f'invalid setting ext: {aa_ext}')
def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
q = quote(query)
lang = traits.get_language(params["language"], traits.all_locale) # type: ignore
params["url"] = base_url + f"/search?lang={lang or ''}&content={aa_content}&ext={aa_ext}&sort={aa_sort}&q={q}"
return params
def response(resp) -> List[Dict[str, Optional[str]]]:
results: List[Dict[str, Optional[str]]] = []
dom = html.fromstring(resp.text)
for item in eval_xpath_list(dom, '//main//div[contains(@class, "h-[125]")]/a'):
results.append(_get_result(item))
# The rendering of the WEB page is very strange; except the first position
# all other positions of Anna's result page are enclosed in SGML comments.
# These comments are *uncommented* by some JS code, see query of class
# '.js-scroll-hidden' in Anna's HTML template:
# https://annas-software.org/AnnaArchivist/annas-archive/-/blob/main/allthethings/templates/macros/md5_list.html
for item in eval_xpath_list(dom, '//main//div[contains(@class, "js-scroll-hidden")]'):
item = html.fromstring(item.xpath('./comment()')[0].text)
results.append(_get_result(item))
return results
def _get_result(item):
return {
'template': 'paper.html',
'url': base_url + item.xpath('./@href')[0],
'title': extract_text(eval_xpath(item, './/h3/text()[1]')),
'publisher': extract_text(eval_xpath(item, './/div[contains(@class, "text-sm")]')),
'authors': [extract_text(eval_xpath(item, './/div[contains(@class, "italic")]'))],
'content': extract_text(eval_xpath(item, './/div[contains(@class, "text-xs")]')),
'img_src': item.xpath('.//img/@src')[0],
}
def fetch_traits(engine_traits: EngineTraits):
"""Fetch languages and other search arguments from Anna's search form."""
# pylint: disable=import-outside-toplevel
import babel
from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx.locales import language_tag
engine_traits.all_locale = ''
engine_traits.custom['content'] = []
engine_traits.custom['ext'] = []
engine_traits.custom['sort'] = []
resp = get(base_url + '/search')
if not resp.ok: # type: ignore
raise RuntimeError("Response from Anna's search page is not OK.")
dom = html.fromstring(resp.text) # type: ignore
# supported language codes
lang_map = {}
for x in eval_xpath_list(dom, "//form//select[@name='lang']//option"):
eng_lang = x.get("value")
if eng_lang in ('', '_empty', 'nl-BE', 'und'):
continue
try:
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
except babel.UnknownLocaleError:
# silently ignore unknown languages
# print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
continue
sxng_lang = language_tag(locale)
conflict = engine_traits.languages.get(sxng_lang)
if conflict:
if conflict != eng_lang:
print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
continue
engine_traits.languages[sxng_lang] = eng_lang
for x in eval_xpath_list(dom, "//form//select[@name='content']//option"):
engine_traits.custom['content'].append(x.get("value"))
for x in eval_xpath_list(dom, "//form//select[@name='ext']//option"):
engine_traits.custom['ext'].append(x.get("value"))
for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
engine_traits.custom['sort'].append(x.get("value"))
+62
View File
@@ -0,0 +1,62 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""APKMirror
"""
# pylint: disable=invalid-name
from urllib.parse import urlencode
from lxml import html
from searx.utils import (
eval_xpath_list,
eval_xpath_getindex,
extract_text,
)
about = {
"website": 'https://www.apkmirror.com',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['files', 'apps']
paging = True
time_range_support = False
# search-url
base_url = 'https://www.apkmirror.com'
search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}'
def request(query, params):
params['url'] = search_url.format(
pageno=params['pageno'],
query=urlencode({'s': query}),
)
logger.debug("query_url --> %s", params['url'])
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
# parse results
for result in eval_xpath_list(dom, "//div[@id='content']//div[@class='listWidget']/div/div[@class='appRow']"):
link = eval_xpath_getindex(result, './/h5/a', 0)
url = base_url + link.attrib.get('href') + '#downloads'
title = extract_text(link)
img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
res = {'url': url, 'title': title, 'img_src': img_src}
results.append(res)
return results
+57
View File
@@ -0,0 +1,57 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Apple App Store
"""
from json import loads
from urllib.parse import urlencode
from dateutil.parser import parse
about = {
"website": 'https://www.apple.com/app-store/',
"wikidata_id": 'Q368215',
"official_api_documentation": (
'https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/'
'iTuneSearchAPI/UnderstandingSearchResults.html#//apple_ref/doc/uid/TP40017632-CH8-SW1'
),
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
categories = ['files', 'apps']
safesearch = True
search_url = 'https://itunes.apple.com/search?{query}'
def request(query, params):
explicit = "Yes"
if params['safesearch'] > 0:
explicit = "No"
params['url'] = search_url.format(query=urlencode({'term': query, 'media': 'software', 'explicit': explicit}))
return params
def response(resp):
results = []
json_result = loads(resp.text)
for result in json_result['results']:
results.append(
{
'url': result['trackViewUrl'],
'title': result['trackName'],
'content': result['description'],
'img_src': result['artworkUrl100'],
'publishedDate': parse(result['currentVersionReleaseDate']),
'author': result['sellerName'],
}
)
return results
+113
View File
@@ -0,0 +1,113 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Apple Maps"""
from json import loads
from time import time
from urllib.parse import urlencode
from searx.network import get as http_get
from searx.engines.openstreetmap import get_key_label
about = {
"website": 'https://www.apple.com/maps/',
"wikidata_id": 'Q276101',
"official_api_documentation": None,
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
token = {'value': '', 'last_updated': None}
categories = ['map']
paging = False
search_url = "https://api.apple-mapkit.com/v1/search?{query}&mkjsVersion=5.72.53"
def obtain_token():
update_time = time() - (time() % 1800)
try:
# use duckduckgo's mapkit token
token_response = http_get('https://duckduckgo.com/local.js?get_mk_token=1', timeout=2.0)
actual_token = http_get(
'https://cdn.apple-mapkit.com/ma/bootstrap?apiVersion=2&mkjsVersion=5.72.53&poi=1',
timeout=2.0,
headers={'Authorization': 'Bearer ' + token_response.text},
)
token['value'] = loads(actual_token.text)['authInfo']['access_token']
token['last_updated'] = update_time
# pylint: disable=bare-except
except:
pass
return token
def request(query, params):
if time() - (token['last_updated'] or 0) > 1800:
obtain_token()
params['url'] = search_url.format(query=urlencode({'q': query, 'lang': params['language']}))
params['headers'] = {'Authorization': 'Bearer ' + token['value']}
return params
def response(resp):
results = []
resp_json = loads(resp.text)
user_language = resp.search_params['language']
for result in resp_json['results']:
boundingbox = None
if 'displayMapRegion' in result:
box = result['displayMapRegion']
boundingbox = [box['southLat'], box['northLat'], box['westLng'], box['eastLng']]
links = []
if 'telephone' in result:
telephone = result['telephone']
links.append(
{
'label': get_key_label('phone', user_language),
'url': 'tel:' + telephone,
'url_label': telephone,
}
)
if result.get('urls'):
url = result['urls'][0]
links.append(
{
'label': get_key_label('website', user_language),
'url': url,
'url_label': url,
}
)
results.append(
{
'template': 'map.html',
'type': result.get('poiCategory'),
'title': result['name'],
'links': links,
'latitude': result['center']['lat'],
'longitude': result['center']['lng'],
'url': result['placecardUrl'],
'boundingbox': boundingbox,
'geojson': {'type': 'Point', 'coordinates': [result['center']['lng'], result['center']['lat']]},
'address': {
'name': result['name'],
'house_number': result.get('subThoroughfare'),
'road': result.get('thoroughfare'),
'locality': result.get('locality'),
'postcode': result.get('postCode'),
'country': result.get('country'),
},
}
)
return results
+152
View File
@@ -0,0 +1,152 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Arch Linux Wiki
~~~~~~~~~~~~~~~
This implementation does not use a official API: Mediawiki provides API, but
Arch Wiki blocks access to it.
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode, urljoin, urlparse
import lxml
import babel
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
from searx.enginelib.traits import EngineTraits
from searx.locales import language_tag
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://wiki.archlinux.org/',
"wikidata_id": 'Q101445877',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['it', 'software wikis']
paging = True
main_wiki = 'wiki.archlinux.org'
def request(query, params):
sxng_lang = params['searxng_locale'].split('-')[0]
netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
title: str = traits.custom['title'].get(sxng_lang, 'Special:Search') # type: ignore
base_url = 'https://' + netloc + '/index.php?'
offset = (params['pageno'] - 1) * 20
if netloc == main_wiki:
eng_lang: str = traits.get_language(sxng_lang, 'English') # type: ignore
query += ' (' + eng_lang + ')'
elif netloc == 'wiki.archlinuxcn.org':
base_url = 'https://' + netloc + '/wzh/index.php?'
args = {
'search': query,
'title': title,
'limit': 20,
'offset': offset,
'profile': 'default',
}
params['url'] = base_url + urlencode(args)
return params
def response(resp):
results = []
dom = lxml.html.fromstring(resp.text) # type: ignore
# get the base URL for the language in which request was made
sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
base_url = 'https://' + netloc + '/index.php?'
for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0)
content = extract_text(result.xpath('.//div[@class="searchresult"]'))
results.append(
{
'url': urljoin(base_url, link.get('href')), # type: ignore
'title': extract_text(link),
'content': content,
}
)
return results
def fetch_traits(engine_traits: EngineTraits):
"""Fetch languages from Archlinix-Wiki. The location of the Wiki address of a
language is mapped in a :py:obj:`custom field
<searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``). Depending
on the location, the ``title`` argument in the request is translated.
.. code:: python
"custom": {
"wiki_netloc": {
"de": "wiki.archlinux.de",
# ...
"zh": "wiki.archlinuxcn.org"
}
"title": {
"de": "Spezial:Suche",
# ...
"zh": "Special:\u641c\u7d22"
},
},
"""
# pylint: disable=import-outside-toplevel
from searx.network import get # see https://github.com/searxng/searxng/issues/762
engine_traits.custom['wiki_netloc'] = {}
engine_traits.custom['title'] = {}
title_map = {
'de': 'Spezial:Suche',
'fa': 'ویژه:جستجو',
'ja': '特別:検索',
'zh': 'Special:搜索',
}
resp = get('https://wiki.archlinux.org/')
if not resp.ok: # type: ignore
print("ERROR: response from wiki.archlinix.org is not OK.")
dom = lxml.html.fromstring(resp.text) # type: ignore
for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
# zh_Hans --> zh
sxng_tag = sxng_tag.split('_')[0]
netloc = urlparse(a.get('href')).netloc
if netloc != 'wiki.archlinux.org':
title = title_map.get(sxng_tag)
if not title:
print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
continue
engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
engine_traits.custom['title'][sxng_tag] = title # type: ignore
eng_tag = extract_text(eval_xpath_list(a, ".//span"))
engine_traits.languages[sxng_tag] = eng_tag # type: ignore
engine_traits.languages['en'] = 'English'
+69
View File
@@ -0,0 +1,69 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""The Art Institute of Chicago
Explore thousands of artworks from The Art Institute of Chicago.
* https://artic.edu
"""
from json import loads
from urllib.parse import urlencode
about = {
"website": 'https://www.artic.edu',
"wikidata_id": 'Q239303',
"official_api_documentation": 'http://api.artic.edu/docs/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
categories = ['images']
paging = True
nb_per_page = 20
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
image_api = 'https://www.artic.edu/iiif/2/'
def request(query, params):
args = urlencode(
{
'q': query,
'page': params['pageno'],
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
'limit': nb_per_page,
}
)
params['url'] = search_api + args
logger.debug("query_url --> %s", params['url'])
return params
def response(resp):
results = []
json_data = loads(resp.text)
for result in json_data['data']:
if not result['image_id']:
continue
results.append(
{
'url': 'https://artic.edu/artworks/%(id)s' % result,
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
'content': result['medium_display'],
'author': ', '.join(result['artist_titles']),
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
'img_format': result['dimensions'],
'template': 'images.html',
}
)
return results
+109
View File
@@ -0,0 +1,109 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
ArXiV (Scientific preprints)
"""
from lxml import etree
from lxml.etree import XPath
from datetime import datetime
from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
# about
about = {
"website": 'https://arxiv.org',
"wikidata_id": 'Q118398',
"official_api_documentation": 'https://arxiv.org/help/api',
"use_official_api": True,
"require_api_key": False,
"results": 'XML-RSS',
}
categories = ['science', 'scientific publications']
paging = True
base_url = (
'https://export.arxiv.org/api/query?search_query=all:' + '{query}&start={offset}&max_results={number_of_results}'
)
# engine dependent config
number_of_results = 10
# xpaths
arxiv_namespaces = {
"atom": "http://www.w3.org/2005/Atom",
"arxiv": "http://arxiv.org/schemas/atom",
}
xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
def request(query, params):
# basic search
offset = (params['pageno'] - 1) * number_of_results
string_args = dict(query=query, offset=offset, number_of_results=number_of_results)
params['url'] = base_url.format(**string_args)
return params
def response(resp):
results = []
dom = etree.fromstring(resp.content)
for entry in eval_xpath_list(dom, xpath_entry):
title = eval_xpath_getindex(entry, xpath_title, 0).text
url = eval_xpath_getindex(entry, xpath_id, 0).text
abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
# doi
doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
doi = None if doi_element is None else doi_element.text
# pdf
pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
# journal
journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
journal = None if journal_element is None else journal_element.text
# tags
tag_elements = eval_xpath(entry, xpath_category)
tags = [str(tag) for tag in tag_elements]
# comments
comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
comments = None if comments_elements is None else comments_elements.text
publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
res_dict = {
'template': 'paper.html',
'url': url,
'title': title,
'publishedDate': publishedDate,
'content': abstract,
'doi': doi,
'authors': authors,
'journal': journal,
'tags': tags,
'comments': comments,
'pdf_url': pdf_url,
}
results.append(res_dict)
return results
+95
View File
@@ -0,0 +1,95 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bandcamp (Music)
@website https://bandcamp.com/
@provide-api no
@results HTML
@parse url, title, content, publishedDate, iframe_src, thumbnail
"""
from urllib.parse import urlencode, urlparse, parse_qs
from dateutil.parser import parse as dateparse
from lxml import html
from searx.utils import (
eval_xpath_getindex,
eval_xpath_list,
extract_text,
)
# about
about = {
"website": 'https://bandcamp.com/',
"wikidata_id": 'Q545966',
"official_api_documentation": 'https://bandcamp.com/developer',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['music']
paging = True
base_url = "https://bandcamp.com/"
search_string = 'search?{query}&page={page}'
iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small"
def request(query, params):
'''pre-request callback
params<dict>:
method : POST/GET
headers : {}
data : {} # if method == POST
url : ''
category: 'search category'
pageno : 1 # number of the requested page
'''
search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno'])
params['url'] = base_url + search_path
return params
def response(resp):
'''post-response callback
resp: requests response object
'''
results = []
dom = html.fromstring(resp.text)
for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'):
link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None)
if link is None:
continue
title = result.xpath('.//div[@class="heading"]/a/text()')
content = result.xpath('.//div[@class="subhead"]/text()')
new_result = {
"url": extract_text(link),
"title": extract_text(title),
"content": extract_text(content),
}
date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None)
if date:
new_result["publishedDate"] = dateparse(date.replace("released ", ""))
thumbnail = result.xpath('.//div[@class="art"]/img/@src')
if thumbnail:
new_result['img_src'] = thumbnail[0]
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower()
if "album" == itemtype:
new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id)
elif "track" == itemtype:
new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id)
results.append(new_result)
return results
+112
View File
@@ -0,0 +1,112 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
BASE (Scholar publications)
"""
from urllib.parse import urlencode
from lxml import etree
from datetime import datetime
import re
from searx.utils import searx_useragent
# about
about = {
"website": 'https://base-search.net',
"wikidata_id": 'Q448335',
"official_api_documentation": 'https://api.base-search.net/',
"use_official_api": True,
"require_api_key": False,
"results": 'XML',
}
categories = ['science']
base_url = (
'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'
+ '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}'
)
# engine dependent config
paging = True
number_of_results = 10
# shortcuts for advanced search
shorcut_dict = {
# user-friendly keywords
'format:': 'dcformat:',
'author:': 'dccreator:',
'collection:': 'dccollection:',
'hdate:': 'dchdate:',
'contributor:': 'dccontributor:',
'coverage:': 'dccoverage:',
'date:': 'dcdate:',
'abstract:': 'dcdescription:',
'urls:': 'dcidentifier:',
'language:': 'dclanguage:',
'publisher:': 'dcpublisher:',
'relation:': 'dcrelation:',
'rights:': 'dcrights:',
'source:': 'dcsource:',
'subject:': 'dcsubject:',
'title:': 'dctitle:',
'type:': 'dcdctype:',
}
def request(query, params):
# replace shortcuts with API advanced search keywords
for key in shorcut_dict.keys():
query = re.sub(key, shorcut_dict[key], query)
# basic search
offset = (params['pageno'] - 1) * number_of_results
string_args = dict(query=urlencode({'query': query}), offset=offset, hits=number_of_results)
params['url'] = base_url.format(**string_args)
params['headers']['User-Agent'] = searx_useragent()
return params
def response(resp):
results = []
search_results = etree.XML(resp.content)
for entry in search_results.xpath('./result/doc'):
content = "No description available"
date = datetime.now() # needed in case no dcdate is available for an item
for item in entry:
if item.attrib["name"] == "dcdate":
date = item.text
elif item.attrib["name"] == "dctitle":
title = item.text
elif item.attrib["name"] == "dclink":
url = item.text
elif item.attrib["name"] == "dcdescription":
content = item.text[:300]
if len(item.text) > 300:
content += "..."
# dates returned by the BASE API are not several formats
publishedDate = None
for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']:
try:
publishedDate = datetime.strptime(date, date_format)
break
except:
pass
if publishedDate is not None:
res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
else:
res_dict = {'url': url, 'title': title, 'content': content}
results.append(res_dict)
return results
+337
View File
@@ -0,0 +1,337 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This is the implementation of the Bing-WEB engine. Some of this
implementations are shared by other engines:
- :ref:`bing images engine`
- :ref:`bing news engine`
- :ref:`bing videos engine`
On the `preference page`_ Bing offers a lot of languages an regions (see section
'Search results languages' and 'Country/region'). However, the abundant choice
does not correspond to reality, where Bing has a full-text indexer only for a
limited number of languages. By example: you can select a language like Māori
but you never get a result in this language.
What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
to be completely correct either (if you take a closer look you will find some
inaccuracies there too):
- :py:obj:`searx.engines.bing.bing_traits_url`
- :py:obj:`searx.engines.bing_videos.bing_traits_url`
- :py:obj:`searx.engines.bing_images.bing_traits_url`
- :py:obj:`searx.engines.bing_news.bing_traits_url`
.. _preference page: https://www.bing.com/account/general
.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
"""
# pylint: disable=too-many-branches, invalid-name
from typing import TYPE_CHECKING
import datetime
import re
import uuid
from urllib.parse import urlencode
from lxml import html
import babel
import babel.languages
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
from searx.locales import language_tag, region_tag
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://www.bing.com',
"wikidata_id": 'Q182496',
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
send_accept_language_header = True
"""Bing tries to guess user's language and territory from the HTTP
Accept-Language. Optional the user can select a search-language (can be
different to the UI language) and a region (market code)."""
# engine dependent config
categories = ['general', 'web']
paging = True
time_range_support = True
safesearch = True
safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # cookie: ADLT=STRICT
base_url = 'https://www.bing.com/search'
"""Bing (Web) search URL"""
bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
"""Bing (Web) search API description"""
def _get_offset_from_pageno(pageno):
return (pageno - 1) * 10 + 1
def set_bing_cookies(params, engine_language, engine_region, SID):
# set cookies
# -----------
params['cookies']['_EDGE_V'] = '1'
# _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
_EDGE_S = [
'F=1',
'SID=%s' % SID,
'mkt=%s' % engine_region.lower(),
'ui=%s' % engine_language.lower(),
]
params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
# "_EDGE_CD": "m=zh-tw",
_EDGE_CD = [ # pylint: disable=invalid-name
'm=%s' % engine_region.lower(), # search region: zh-cn
'u=%s' % engine_language.lower(), # UI: en-us
]
params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
SRCHHPGUSR = [ # pylint: disable=invalid-name
'SRCHLANG=%s' % engine_language,
# Trying to set ADLT cookie here seems not to have any effect, I assume
# there is some age verification by a cookie (and/or session ID) needed,
# to disable the SafeSearch.
'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
]
params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
def request(query, params):
"""Assemble a Bing-Web request."""
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
engine_language = traits.get_language(params['searxng_locale'], 'en')
SID = uuid.uuid1().hex.upper()
CVID = uuid.uuid1().hex.upper()
set_bing_cookies(params, engine_language, engine_region, SID)
# build URL query
# ---------------
# query term
page = int(params.get('pageno', 1))
query_params = {
# fmt: off
'q': query,
'pq': query,
'cvid': CVID,
'qs': 'n',
'sp': '-1'
# fmt: on
}
# page
if page > 1:
referer = base_url + '?' + urlencode(query_params)
params['headers']['Referer'] = referer
logger.debug("headers.Referer --> %s", referer)
query_params['first'] = _get_offset_from_pageno(page)
if page == 2:
query_params['FORM'] = 'PERE'
elif page > 2:
query_params['FORM'] = 'PERE%s' % (page - 2)
filters = ''
if params['time_range']:
query_params['filt'] = 'custom'
if params['time_range'] == 'day':
filters = 'ex1:"ez1"'
elif params['time_range'] == 'week':
filters = 'ex1:"ez2"'
elif params['time_range'] == 'month':
filters = 'ex1:"ez3"'
elif params['time_range'] == 'year':
epoch_1970 = datetime.date(1970, 1, 1)
today_no = (datetime.date.today() - epoch_1970).days
filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
params['url'] = base_url + '?' + urlencode(query_params)
if filters:
params['url'] = params['url'] + '&filters=' + filters
return params
def response(resp):
# pylint: disable=too-many-locals,import-outside-toplevel
from searx.network import Request, multi_requests # see https://github.com/searxng/searxng/issues/762
results = []
result_len = 0
dom = html.fromstring(resp.text)
# parse results again if nothing is found yet
url_to_resolve = []
url_to_resolve_index = []
i = 0
for result in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):
link = eval_xpath_getindex(result, './/h2/a', 0, None)
if link is None:
continue
url = link.attrib.get('href')
title = extract_text(link)
content = eval_xpath(result, '(.//p)[1]')
for p in content:
# Make sure that the element is free of <a href> links
for e in p.xpath('.//a'):
e.getparent().remove(e)
content = extract_text(content)
# get the real URL either using the URL shown to user or following the Bing URL
if url.startswith('https://www.bing.com/ck/a?'):
url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
# Bing can shorten the URL either at the end or in the middle of the string
if (
url_cite
and url_cite.startswith('https://')
and '' not in url_cite
and '...' not in url_cite
and '' not in url_cite
):
# no need for an additional HTTP request
url = url_cite
else:
# resolve the URL with an additional HTTP request
url_to_resolve.append(url.replace('&ntb=1', '&ntb=F'))
url_to_resolve_index.append(i)
url = None # remove the result if the HTTP Bing redirect raise an exception
# append result
results.append({'url': url, 'title': title, 'content': content})
# increment result pointer for the next iteration in this loop
i += 1
# resolve all Bing redirections in parallel
request_list = [
Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
]
response_list = multi_requests(request_list)
for i, redirect_response in enumerate(response_list):
if not isinstance(redirect_response, Exception):
results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
# get number_of_results
try:
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
if "-" in result_len_container:
# Remove the part "from-to" for paginated request ...
result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :]
result_len_container = re.sub('[^0-9]', '', result_len_container)
if len(result_len_container) > 0:
result_len = int(result_len_container)
except Exception as e: # pylint: disable=broad-except
logger.debug('result error :\n%s', e)
if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
return []
results.append({'number_of_results': result_len})
return results
def fetch_traits(engine_traits: EngineTraits):
"""Fetch languages and regions from Bing-Web."""
xpath_market_codes = '//table[1]/tbody/tr/td[3]'
# xpath_country_codes = '//table[2]/tbody/tr/td[2]'
xpath_language_codes = '//table[3]/tbody/tr/td[2]'
_fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
# pylint: disable=too-many-locals,import-outside-toplevel
from searx.network import get # see https://github.com/searxng/searxng/issues/762
# insert alias to map from a language (zh) to a language + script (zh_Hans)
engine_traits.languages['zh'] = 'zh-hans'
resp = get(url)
if not resp.ok: # type: ignore
print("ERROR: response from peertube is not OK.")
dom = html.fromstring(resp.text) # type: ignore
map_lang = {'jp': 'ja'}
for td in eval_xpath(dom, xpath_language_codes):
eng_lang = td.text
if eng_lang in ('en-gb', 'pt-br'):
# language 'en' is already in the list and a language 'en-gb' can't
# be handled in SearXNG, same with pt-br which is covered by pt-pt.
continue
babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
try:
sxng_tag = language_tag(babel.Locale.parse(babel_lang))
except babel.UnknownLocaleError:
print("ERROR: language (%s) is unknown by babel" % (eng_lang))
continue
conflict = engine_traits.languages.get(sxng_tag)
if conflict:
if conflict != eng_lang:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
continue
engine_traits.languages[sxng_tag] = eng_lang
map_region = {
'en-ID': 'id_ID',
'no-NO': 'nb_NO',
}
for td in eval_xpath(dom, xpath_market_codes):
eng_region = td.text
babel_region = map_region.get(eng_region, eng_region).replace('-', '_')
if eng_region == 'en-WW':
engine_traits.all_locale = eng_region
continue
try:
sxng_tag = region_tag(babel.Locale.parse(babel_region))
except babel.UnknownLocaleError:
print("ERROR: region (%s) is unknown by babel" % (eng_region))
continue
conflict = engine_traits.regions.get(sxng_tag)
if conflict:
if conflict != eng_region:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
continue
engine_traits.regions[sxng_tag] = eng_region
+132
View File
@@ -0,0 +1,132 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bing-Images: description see :py:obj:`searx.engines.bing`.
"""
# pylint: disable=invalid-name
from typing import TYPE_CHECKING
import uuid
import json
from urllib.parse import urlencode
from lxml import html
from searx.enginelib.traits import EngineTraits
from searx.engines.bing import (
set_bing_cookies,
_fetch_traits,
)
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://www.bing.com/images',
"wikidata_id": 'Q182496',
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['images', 'web']
paging = True
safesearch = True
time_range_support = True
base_url = 'https://www.bing.com/images/async'
"""Bing (Images) search URL"""
bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes'
"""Bing (Images) search API description"""
time_map = {
# fmt: off
'day': 60 * 24,
'week': 60 * 24 * 7,
'month': 60 * 24 * 31,
'year': 60 * 24 * 365,
# fmt: on
}
def request(query, params):
"""Assemble a Bing-Image request."""
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
engine_language = traits.get_language(params['searxng_locale'], 'en')
SID = uuid.uuid1().hex.upper()
set_bing_cookies(params, engine_language, engine_region, SID)
# build URL query
# - example: https://www.bing.com/images/async?q=foo&first=155&count=35
query_params = {
# fmt: off
'q': query,
'async' : 'content',
# to simplify the page count lets use the default of 35 images per page
'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
'count' : 35,
# fmt: on
}
# time range
# - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'
if params['time_range']:
query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']]
params['url'] = base_url + '?' + urlencode(query_params)
return params
def response(resp):
"""Get response from Bing-Images"""
results = []
dom = html.fromstring(resp.text)
for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'):
metadata = result.xpath('.//a[@class="iusc"]/@m')
if not metadata:
continue
metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0])
title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip()
source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
results.append(
{
'template': 'images.html',
'url': metadata['purl'],
'thumbnail_src': metadata['turl'],
'img_src': metadata['murl'],
'content': metadata['desc'],
'title': title,
'source': source,
'img_format': img_format,
}
)
return results
def fetch_traits(engine_traits: EngineTraits):
"""Fetch languages and regions from Bing-News."""
xpath_market_codes = '//table[1]/tbody/tr/td[3]'
# xpath_country_codes = '//table[2]/tbody/tr/td[2]'
xpath_language_codes = '//table[3]/tbody/tr/td[2]'
_fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
+150
View File
@@ -0,0 +1,150 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bing-News: description see :py:obj:`searx.engines.bing`.
"""
# pylint: disable=invalid-name
from typing import TYPE_CHECKING
import uuid
from urllib.parse import urlencode
from lxml import html
from searx.enginelib.traits import EngineTraits
from searx.engines.bing import (
set_bing_cookies,
_fetch_traits,
)
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://www.bing.com/news',
"wikidata_id": 'Q2878637',
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api',
"use_official_api": False,
"require_api_key": False,
"results": 'RSS',
}
# engine dependent config
categories = ['news']
paging = True
time_range_support = True
time_map = {
'day': '4',
'week': '8',
'month': '9',
}
"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the
difference of *last day* and *last week* in the result list is just marginally.
"""
base_url = 'https://www.bing.com/news/infinitescrollajax'
"""Bing (News) search URL"""
bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes'
"""Bing (News) search API description"""
mkt_alias = {
'zh': 'en-WW',
'zh-CN': 'en-WW',
}
"""Bing News has an official market code 'zh-CN' but we won't get a result with
this market code. For 'zh' and 'zh-CN' we better use the *Worldwide aggregate*
market code (en-WW).
"""
def request(query, params):
"""Assemble a Bing-News request."""
sxng_locale = params['searxng_locale']
engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
engine_language = traits.get_language(sxng_locale, 'en')
SID = uuid.uuid1().hex.upper()
set_bing_cookies(params, engine_language, engine_region, SID)
# build URL query
#
# example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
query_params = {
# fmt: off
'q': query,
'InfiniteScroll': 1,
# to simplify the page count lets use the default of 10 images per page
'first' : (int(params.get('pageno', 1)) - 1) * 10 + 1,
# fmt: on
}
if params['time_range']:
# qft=interval:"7"
query_params['qft'] = 'qft=interval="%s"' % time_map.get(params['time_range'], '9')
params['url'] = base_url + '?' + urlencode(query_params)
return params
def response(resp):
"""Get response from Bing-Video"""
results = []
if not resp.ok or not resp.text:
return results
dom = html.fromstring(resp.text)
for newsitem in dom.xpath('//div[contains(@class, "newsitem")]'):
url = newsitem.xpath('./@url')[0]
title = ' '.join(newsitem.xpath('.//div[@class="caption"]//a[@class="title"]/text()')).strip()
content = ' '.join(newsitem.xpath('.//div[@class="snippet"]/text()')).strip()
thumbnail = None
author = newsitem.xpath('./@data-author')[0]
metadata = ' '.join(newsitem.xpath('.//div[@class="source"]/span/text()')).strip()
img_src = newsitem.xpath('.//a[@class="imagelink"]//img/@src')
if img_src:
thumbnail = 'https://www.bing.com/' + img_src[0]
results.append(
{
'url': url,
'title': title,
'content': content,
'img_src': thumbnail,
'author': author,
'metadata': metadata,
}
)
return results
def fetch_traits(engine_traits: EngineTraits):
"""Fetch languages and regions from Bing-News.
The :py:obj:`description <searx.engines.bing_news.bing_traits_url>` of the
first table says *"query parameter when calling the Video Search API."*
.. thats why I use the 4. table "News Category API markets" for the
``xpath_market_codes``.
"""
xpath_market_codes = '//table[4]/tbody/tr/td[3]'
# xpath_country_codes = '//table[2]/tbody/tr/td[2]'
xpath_language_codes = '//table[3]/tbody/tr/td[2]'
_fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
+128
View File
@@ -0,0 +1,128 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
"""
# pylint: disable=invalid-name
from typing import TYPE_CHECKING
import uuid
import json
from urllib.parse import urlencode
from lxml import html
from searx.enginelib.traits import EngineTraits
from searx.engines.bing import (
set_bing_cookies,
_fetch_traits,
)
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://www.bing.com/videos',
"wikidata_id": 'Q4914152',
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['videos', 'web']
paging = True
safesearch = True
time_range_support = True
base_url = 'https://www.bing.com/videos/asyncv2'
"""Bing (Videos) async search URL."""
bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes'
"""Bing (Video) search API description"""
time_map = {
# fmt: off
'day': 60 * 24,
'week': 60 * 24 * 7,
'month': 60 * 24 * 31,
'year': 60 * 24 * 365,
# fmt: on
}
def request(query, params):
"""Assemble a Bing-Video request."""
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
engine_language = traits.get_language(params['searxng_locale'], 'en')
SID = uuid.uuid1().hex.upper()
set_bing_cookies(params, engine_language, engine_region, SID)
# build URL query
#
# example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
query_params = {
# fmt: off
'q': query,
'async' : 'content',
# to simplify the page count lets use the default of 35 images per page
'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
'count' : 35,
# fmt: on
}
# time range
#
# example: one week (10080 minutes) '&qft= filterui:videoage-lt10080' '&form=VRFLTR'
if params['time_range']:
query_params['form'] = 'VRFLTR'
query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']]
params['url'] = base_url + '?' + urlencode(query_params)
return params
def response(resp):
"""Get response from Bing-Video"""
results = []
dom = html.fromstring(resp.text)
for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'):
metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
content = '{0} - {1}'.format(metadata['du'], info)
thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0]
results.append(
{
'url': metadata['murl'],
'thumbnail': thumbnail,
'title': metadata.get('vt', ''),
'content': content,
'template': 'videos.html',
}
)
return results
def fetch_traits(engine_traits: EngineTraits):
"""Fetch languages and regions from Bing-Videos."""
xpath_market_codes = '//table[1]/tbody/tr/td[3]'
# xpath_country_codes = '//table[2]/tbody/tr/td[2]'
xpath_language_codes = '//table[3]/tbody/tr/td[2]'
_fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
+419
View File
@@ -0,0 +1,419 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Brave supports the categories listed in :py:obj:`brave_category` (General,
news, videos, images). The support of :py:obj:`paging` and :py:obj:`time range
<time_range_support>` is limited (see remarks).
Configured ``brave`` engines:
.. code:: yaml
- name: brave
engine: brave
...
brave_category: search
time_range_support: true
paging: true
- name: brave.images
engine: brave
...
brave_category: images
- name: brave.videos
engine: brave
...
brave_category: videos
- name: brave.news
engine: brave
...
brave_category: news
.. _brave regions:
Brave regions
=============
Brave uses two-digit tags for the regions like ``ca`` while SearXNG deals with
locales. To get a mapping, all *officatl de-facto* languages of the Brave
region are mapped to regions in SearXNG (see :py:obj:`babel
<babel.languages.get_official_languages>`):
.. code:: python
"regions": {
..
"en-CA": "ca",
"fr-CA": "ca",
..
}
.. note::
The language (aka region) support of Brave's index is limited to very basic
languages. The search results for languages like Chinese or Arabic are of
low quality.
.. _brave languages:
Brave languages
===============
Brave's language support is limited to the UI (menues, area local notations,
etc). Brave's index only seems to support a locale, but it does not seem to
support any languages in its index. The choice of available languages is very
small (and its not clear to me where the differencee in UI is when switching
from en-us to en-ca or en-gb).
In the :py:obj:`EngineTraits object <searx.enginelib.traits.EngineTraits>` the
UI languages are stored in a custom field named ``ui_lang``:
.. code:: python
"custom": {
"ui_lang": {
"ca": "ca",
"de-DE": "de-de",
"en-CA": "en-ca",
"en-GB": "en-gb",
"en-US": "en-us",
"es": "es",
"fr-CA": "fr-ca",
"fr-FR": "fr-fr",
"ja-JP": "ja-jp",
"pt-BR": "pt-br",
"sq-AL": "sq-al"
}
},
Implementations
===============
"""
from typing import TYPE_CHECKING
import re
from urllib.parse import (
urlencode,
urlparse,
parse_qs,
)
import chompjs
from lxml import html
from searx import locales
from searx.utils import (
extract_text,
eval_xpath_list,
eval_xpath_getindex,
)
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://search.brave.com/',
"wikidata_id": 'Q22906900',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
base_url = "https://search.brave.com/"
categories = []
brave_category = 'search'
"""Brave supports common web-search, video search, image and video search.
- ``search``: Common WEB search
- ``videos``: search for videos
- ``images``: search for images
- ``news``: search for news
"""
brave_spellcheck = False
"""Brave supports some kind of spell checking. When activated, Brave tries to
fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``. In
the UI of Brave the user gets warned about this, since we can not warn the user
in SearXNG, the spellchecking is disabled by default.
"""
send_accept_language_header = True
paging = False
"""Brave only supports paging in :py:obj:`brave_category` ``search`` (UI
category All)."""
safesearch = True
safesearch_map = {2: 'strict', 1: 'moderate', 0: 'off'} # cookie: safesearch=off
time_range_support = False
"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI
category All)."""
time_range_map = {
'day': 'pd',
'week': 'pw',
'month': 'pm',
'year': 'py',
}
def request(query, params):
# Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787
params['headers']['Accept-Encoding'] = 'gzip, deflate'
args = {
'q': query,
}
if brave_spellcheck:
args['spellcheck'] = '1'
if brave_category == 'search':
if params.get('pageno', 1) - 1:
args['offset'] = params.get('pageno', 1) - 1
if time_range_map.get(params['time_range']):
args['tf'] = time_range_map.get(params['time_range'])
params["url"] = f"{base_url}{brave_category}?{urlencode(args)}"
# set properties in the cookies
params['cookies']['safesearch'] = safesearch_map.get(params['safesearch'], 'off')
# the useLocation is IP based, we use cookie 'country' for the region
params['cookies']['useLocation'] = '0'
params['cookies']['summarizer'] = '0'
engine_region = traits.get_region(params['searxng_locale'], 'all')
params['cookies']['country'] = engine_region.split('-')[-1].lower() # type: ignore
ui_lang = locales.get_engine_locale(params['searxng_locale'], traits.custom["ui_lang"], 'en-us')
params['cookies']['ui_lang'] = ui_lang
logger.debug("cookies %s", params['cookies'])
def response(resp):
if brave_category == 'search':
return _parse_search(resp)
datastr = ""
for line in resp.text.split("\n"):
if "const data = " in line:
datastr = line.replace("const data = ", "").strip()[:-1]
break
json_data = chompjs.parse_js_object(datastr)
json_resp = json_data[1]['data']['body']['response']
if brave_category == 'news':
json_resp = json_resp['news']
return _parse_news(json_resp)
if brave_category == 'images':
return _parse_images(json_resp)
if brave_category == 'videos':
return _parse_videos(json_resp)
raise ValueError(f"Unsupported brave category: {brave_category}")
def _parse_search(resp):
result_list = []
dom = html.fromstring(resp.text)
answer_tag = eval_xpath_getindex(dom, '//div[@class="answer"]', 0, default=None)
if answer_tag:
result_list.append({'answer': extract_text(answer_tag)})
# xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'
xpath_results = '//div[contains(@class, "snippet")]'
for result in eval_xpath_list(dom, xpath_results):
url = eval_xpath_getindex(result, './/a[@class="result-header"]/@href', 0, default=None)
title_tag = eval_xpath_getindex(result, './/span[@class="snippet-title"]', 0, default=None)
if not (url and title_tag):
continue
content_tag = eval_xpath_getindex(result, './/p[@class="snippet-description"]', 0, default='')
img_src = eval_xpath_getindex(result, './/img[@class="thumb"]/@src', 0, default='')
item = {
'url': url,
'title': extract_text(title_tag),
'content': extract_text(content_tag),
'img_src': img_src,
}
video_tag = eval_xpath_getindex(
result, './/div[contains(@class, "video-snippet") and @data-macro="video"]', 0, default=None
)
if video_tag is not None:
# In my tests a video tag in the WEB search was mostoften not a
# video, except the ones from youtube ..
iframe_src = _get_iframe_src(url)
if iframe_src:
item['iframe_src'] = iframe_src
item['template'] = 'videos.html'
item['thumbnail'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
else:
item['img_src'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
result_list.append(item)
return result_list
def _get_iframe_src(url):
parsed_url = urlparse(url)
if parsed_url.path == '/watch' and parsed_url.query:
video_id = parse_qs(parsed_url.query).get('v', []) # type: ignore
if video_id:
return 'https://www.youtube-nocookie.com/embed/' + video_id[0] # type: ignore
return None
def _parse_news(json_resp):
result_list = []
for result in json_resp["results"]:
item = {
'url': result['url'],
'title': result['title'],
'content': result['description'],
}
if result['thumbnail'] != "null":
item['img_src'] = result['thumbnail']['src']
result_list.append(item)
return result_list
def _parse_images(json_resp):
result_list = []
for result in json_resp["results"]:
item = {
'url': result['url'],
'title': result['title'],
'content': result['description'],
'template': 'images.html',
'img_format': result['properties']['format'],
'source': result['source'],
'img_src': result['properties']['url'],
}
result_list.append(item)
return result_list
def _parse_videos(json_resp):
result_list = []
for result in json_resp["results"]:
url = result['url']
item = {
'url': url,
'title': result['title'],
'content': result['description'],
'template': 'videos.html',
'length': result['video']['duration'],
'duration': result['video']['duration'],
}
if result['thumbnail'] != "null":
item['thumbnail'] = result['thumbnail']['src']
iframe_src = _get_iframe_src(url)
if iframe_src:
item['iframe_src'] = iframe_src
result_list.append(item)
return result_list
def fetch_traits(engine_traits: EngineTraits):
"""Fetch :ref:`languages <brave languages>` and :ref:`regions <brave
regions>` from Brave."""
# pylint: disable=import-outside-toplevel
import babel.languages
from searx.locales import region_tag, language_tag
from searx.network import get # see https://github.com/searxng/searxng/issues/762
engine_traits.custom["ui_lang"] = {}
headers = {
'Accept-Encoding': 'gzip, deflate',
}
lang_map = {'no': 'nb'} # norway
# languages (UI)
resp = get('https://search.brave.com/settings', headers=headers)
if not resp.ok: # type: ignore
print("ERROR: response from Brave is not OK.")
dom = html.fromstring(resp.text) # type: ignore
for option in dom.xpath('//div[@id="language-select"]//option'):
ui_lang = option.get('value')
try:
if '-' in ui_lang:
sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
else:
sxng_tag = language_tag(babel.Locale.parse(ui_lang))
except babel.UnknownLocaleError:
print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
continue
conflict = engine_traits.custom["ui_lang"].get(sxng_tag)
if conflict:
if conflict != ui_lang:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, ui_lang))
continue
engine_traits.custom["ui_lang"][sxng_tag] = ui_lang
# search regions of brave
engine_traits.all_locale = 'all'
for country in dom.xpath('//div[@id="sidebar"]//ul/li/div[contains(@class, "country")]'):
flag = country.xpath('./span[contains(@class, "flag")]')[0]
# country_name = extract_text(flag.xpath('./following-sibling::*')[0])
country_tag = re.search(r'flag-([^\s]*)\s', flag.xpath('./@class')[0]).group(1) # type: ignore
# add offical languages of the country ..
for lang_tag in babel.languages.get_official_languages(country_tag, de_facto=True):
lang_tag = lang_map.get(lang_tag, lang_tag)
sxng_tag = region_tag(babel.Locale.parse('%s_%s' % (lang_tag, country_tag.upper())))
# print("%-20s: %s <-- %s" % (country_name, country_tag, sxng_tag))
conflict = engine_traits.regions.get(sxng_tag)
if conflict:
if conflict != country_tag:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, country_tag))
continue
engine_traits.regions[sxng_tag] = country_tag
+124
View File
@@ -0,0 +1,124 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""BT4G_ (bt4g.com) is not a tracker and doesn't store any content and only
collects torrent metadata (such as file names and file sizes) and a magnet link
(torrent identifier).
This engine does not parse the HTML page because there is an API in XML (RSS).
The RSS feed provides fewer data like amount of seeders/leechers and the files
in the torrent file. It's a tradeoff for a "stable" engine as the XML from RSS
content will change way less than the HTML page.
.. _BT4G: https://bt4g.com/
Configuration
=============
The engine has the following additional settings:
- :py:obj:`bt4g_order_by`
- :py:obj:`bt4g_category`
With this options a SearXNG maintainer is able to configure **additional**
engines for specific torrent searches. For example a engine to search only for
Movies and sort the result list by the count of seeders.
.. code:: yaml
- name: bt4g.movie
engine: bt4g
shortcut: bt4gv
categories: video
bt4g_order_by: seeders
bt4g_category: 'movie'
Implementations
===============
"""
import re
from datetime import datetime
from urllib.parse import quote
from lxml import etree
from searx.utils import get_torrent_size
# about
about = {
"website": 'https://bt4gprx.com',
"use_official_api": False,
"require_api_key": False,
"results": 'XML',
}
# engine dependent config
categories = ['files']
paging = True
time_range_support = True
# search-url
url = 'https://bt4gprx.com'
search_url = url + '/search?q={search_term}&orderby={order_by}&category={category}&p={pageno}&page=rss'
bt4g_order_by = 'relevance'
"""Result list can be ordered by ``relevance`` (default), ``size``, ``seeders``
or ``time``.
.. hint::
When *time_range* is activate, the results always orderd by ``time``.
"""
bt4g_category = 'all'
"""BT$G offers categoies: ``all`` (default), ``audio``, ``movie``, ``doc``,
``app`` and `` other``.
"""
def request(query, params):
order_by = bt4g_order_by
if params['time_range']:
order_by = 'time'
params['url'] = search_url.format(
search_term=quote(query),
order_by=order_by,
category=bt4g_category,
pageno=params['pageno'],
)
return params
def response(resp):
results = []
search_results = etree.XML(resp.content)
# return empty array if nothing is found
if len(search_results) == 0:
return []
for entry in search_results.xpath('./channel/item'):
title = entry.find("title").text
link = entry.find("guid").text
fullDescription = entry.find("description").text.split('<br>')
filesize = fullDescription[1]
filesizeParsed = re.split(r"([A-Z]+)", filesize)
magnetlink = entry.find("link").text
pubDate = entry.find("pubDate").text
results.append(
{
'url': link,
'title': title,
'magnetlink': magnetlink,
'seed': 'N/A',
'leech': 'N/A',
'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]),
'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
'template': 'torrent.html',
}
)
return results
+89
View File
@@ -0,0 +1,89 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
BTDigg (Videos, Music, Files)
"""
from lxml import html
from urllib.parse import quote, urljoin
from searx.utils import extract_text, get_torrent_size
# about
about = {
"website": 'https://btdig.com',
"wikidata_id": 'Q4836698',
"official_api_documentation": {'url': 'https://btdig.com/contacts', 'comment': 'on demand'},
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['files']
paging = True
# search-url
url = 'https://btdig.com'
search_url = url + '/search?q={search_term}&p={pageno}'
# do search-request
def request(query, params):
params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'] - 1)
return params
# get response from search-request
def response(resp):
results = []
dom = html.fromstring(resp.text)
search_res = dom.xpath('//div[@class="one_result"]')
# return empty array if nothing is found
if not search_res:
return []
# parse results
for result in search_res:
link = result.xpath('.//div[@class="torrent_name"]//a')[0]
href = urljoin(url, link.attrib.get('href'))
title = extract_text(link)
excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0]
content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False)
# it is better to emit <br/> instead of |, but html tags are verboten
content = content.strip().replace('\n', ' | ')
content = ' '.join(content.split())
filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0]
filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1]
files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0]
# convert filesize to byte if possible
filesize = get_torrent_size(filesize, filesize_multiplier)
# convert files to int if possible
try:
files = int(files)
except:
files = None
magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href']
# append result
results.append(
{
'url': href,
'title': title,
'content': content,
'filesize': filesize,
'files': files,
'magnetlink': magnetlink,
'template': 'torrent.html',
}
)
# return results sorted by seeder
return results
+243
View File
@@ -0,0 +1,243 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""With *command engines* administrators can run engines to integrate arbitrary
shell commands.
.. attention::
When creating and enabling a ``command`` engine on a public instance, you
must be careful to avoid leaking private data.
The easiest solution is to limit the access by setting ``tokens`` as described
in section :ref:`private engines`. The engine base is flexible. Only your
imagination can limit the power of this engine (and maybe security concerns).
Configuration
=============
The following options are available:
``command``:
A comma separated list of the elements of the command. A special token
``{{QUERY}}`` tells where to put the search terms of the user. Example:
.. code:: yaml
['ls', '-l', '-h', '{{QUERY}}']
``delimiter``:
A mapping containing a delimiter ``char`` and the *titles* of each element in
``keys``.
``parse_regex``:
A dict containing the regular expressions for each result key.
``query_type``:
The expected type of user search terms. Possible values: ``path`` and
``enum``.
``path``:
Checks if the user provided path is inside the working directory. If not,
the query is not executed.
``enum``:
Is a list of allowed search terms. If the user submits something which is
not included in the list, the query returns an error.
``query_enum``:
A list containing allowed search terms if ``query_type`` is set to ``enum``.
``working_dir``:
The directory where the command has to be executed. Default: ``./``.
``result_separator``:
The character that separates results. Default: ``\\n``.
Example
=======
The example engine below can be used to find files with a specific name in the
configured working directory:
.. code:: yaml
- name: find
engine: command
command: ['find', '.', '-name', '{{QUERY}}']
query_type: path
shortcut: fnd
delimiter:
chars: ' '
keys: ['line']
Implementations
===============
"""
import re
from os.path import expanduser, isabs, realpath, commonprefix
from shlex import split as shlex_split
from subprocess import Popen, PIPE
from threading import Thread
from searx import logger
engine_type = 'offline'
paging = True
command = []
delimiter = {}
parse_regex = {}
query_type = ''
query_enum = []
environment_variables = {}
working_dir = realpath('.')
result_separator = '\n'
result_template = 'key-value.html'
timeout = 4.0
_command_logger = logger.getChild('command')
_compiled_parse_regex = {}
def init(engine_settings):
check_parsing_options(engine_settings)
if 'command' not in engine_settings:
raise ValueError('engine command : missing configuration key: command')
global command, working_dir, delimiter, parse_regex, environment_variables
command = engine_settings['command']
if 'working_dir' in engine_settings:
working_dir = engine_settings['working_dir']
if not isabs(engine_settings['working_dir']):
working_dir = realpath(working_dir)
if 'parse_regex' in engine_settings:
parse_regex = engine_settings['parse_regex']
for result_key, regex in parse_regex.items():
_compiled_parse_regex[result_key] = re.compile(regex, flags=re.MULTILINE)
if 'delimiter' in engine_settings:
delimiter = engine_settings['delimiter']
if 'environment_variables' in engine_settings:
environment_variables = engine_settings['environment_variables']
def search(query, params):
cmd = _get_command_to_run(query)
if not cmd:
return []
results = []
reader_thread = Thread(target=_get_results_from_process, args=(results, cmd, params['pageno']))
reader_thread.start()
reader_thread.join(timeout=timeout)
return results
def _get_command_to_run(query):
params = shlex_split(query)
__check_query_params(params)
cmd = []
for c in command:
if c == '{{QUERY}}':
cmd.extend(params)
else:
cmd.append(c)
return cmd
def _get_results_from_process(results, cmd, pageno):
leftover = ''
count = 0
start, end = __get_results_limits(pageno)
with Popen(cmd, stdout=PIPE, stderr=PIPE, env=environment_variables) as process:
line = process.stdout.readline()
while line:
buf = leftover + line.decode('utf-8')
raw_results = buf.split(result_separator)
if raw_results[-1]:
leftover = raw_results[-1]
raw_results = raw_results[:-1]
for raw_result in raw_results:
result = __parse_single_result(raw_result)
if result is None:
_command_logger.debug('skipped result:', raw_result)
continue
if start <= count and count <= end:
result['template'] = result_template
results.append(result)
count += 1
if end < count:
return results
line = process.stdout.readline()
return_code = process.wait(timeout=timeout)
if return_code != 0:
raise RuntimeError('non-zero return code when running command', cmd, return_code)
def __get_results_limits(pageno):
start = (pageno - 1) * 10
end = start + 9
return start, end
def __check_query_params(params):
if not query_type:
return
if query_type == 'path':
query_path = params[-1]
query_path = expanduser(query_path)
if commonprefix([realpath(query_path), working_dir]) != working_dir:
raise ValueError('requested path is outside of configured working directory')
elif query_type == 'enum' and len(query_enum) > 0:
for param in params:
if param not in query_enum:
raise ValueError('submitted query params is not allowed', param, 'allowed params:', query_enum)
def check_parsing_options(engine_settings):
"""Checks if delimiter based parsing or regex parsing is configured correctly"""
if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
if 'delimiter' in engine_settings and 'parse_regex' in engine_settings:
raise ValueError('failed to init settings for parsing lines: too many settings')
if 'delimiter' in engine_settings:
if 'chars' not in engine_settings['delimiter'] or 'keys' not in engine_settings['delimiter']:
raise ValueError
def __parse_single_result(raw_result):
"""Parses command line output based on configuration"""
result = {}
if delimiter:
elements = raw_result.split(delimiter['chars'], maxsplit=len(delimiter['keys']) - 1)
if len(elements) != len(delimiter['keys']):
return {}
for i in range(len(elements)):
result[delimiter['keys'][i]] = elements[i]
if parse_regex:
for result_key, regex in _compiled_parse_regex.items():
found = regex.search(raw_result)
if not found:
return {}
result[result_key] = raw_result[found.start() : found.end()]
return result
+116
View File
@@ -0,0 +1,116 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""CORE (science)
"""
from datetime import datetime
from urllib.parse import urlencode
from searx.exceptions import SearxEngineAPIException
about = {
"website": 'https://core.ac.uk',
"wikidata_id": 'Q22661180',
"official_api_documentation": 'https://core.ac.uk/documentation/api/',
"use_official_api": True,
"require_api_key": True,
"results": 'JSON',
}
categories = ['science', 'scientific publications']
paging = True
nb_per_page = 10
api_key = 'unset'
base_url = 'https://core.ac.uk:443/api-v2/search/'
search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
def request(query, params):
if api_key == 'unset':
raise SearxEngineAPIException('missing CORE API key')
search_path = search_string.format(
query=urlencode({'q': query}),
nb_per_page=nb_per_page,
page=params['pageno'],
apikey=api_key,
)
params['url'] = base_url + search_path
return params
def response(resp):
results = []
json_data = resp.json()
for result in json_data['data']:
source = result['_source']
url = None
if source.get('urls'):
url = source['urls'][0].replace('http://', 'https://', 1)
if url is None and source.get('doi'):
# use the DOI reference
url = 'https://doi.org/' + source['doi']
if url is None and source.get('downloadUrl'):
# use the downloadUrl
url = source['downloadUrl']
if url is None and source.get('identifiers'):
# try to find an ark id, see
# https://www.wikidata.org/wiki/Property:P8091
# and https://en.wikipedia.org/wiki/Archival_Resource_Key
arkids = [
identifier[5:] # 5 is the length of "ark:/"
for identifier in source.get('identifiers')
if isinstance(identifier, str) and identifier.startswith('ark:/')
]
if len(arkids) > 0:
url = 'https://n2t.net/' + arkids[0]
if url is None:
continue
publishedDate = None
time = source['publishedDate'] or source['depositedDate']
if time:
publishedDate = datetime.fromtimestamp(time / 1000)
# sometimes the 'title' is None / filter None values
journals = [j['title'] for j in (source.get('journals') or []) if j['title']]
publisher = source['publisher']
if publisher:
publisher = source['publisher'].strip("'")
results.append(
{
'template': 'paper.html',
'title': source['title'],
'url': url,
'content': source['description'] or '',
# 'comments': '',
'tags': source['topics'],
'publishedDate': publishedDate,
'type': (source['types'] or [None])[0],
'authors': source['authors'],
'editor': ', '.join(source['contributors'] or []),
'publisher': publisher,
'journal': ', '.join(journals),
# 'volume': '',
# 'pages' : '',
# 'number': '',
'doi': source['doi'],
'issn': [x for x in [source.get('issn')] if x],
'isbn': [x for x in [source.get('isbn')] if x], # exists in the rawRecordXml
'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'),
}
)
return results
+60
View File
@@ -0,0 +1,60 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Semantic Scholar (Science)
"""
# pylint: disable=use-dict-literal
from urllib.parse import urlencode
from searx.utils import html_to_text
about = {
"website": 'https://www.crossref.org/',
"wikidata_id": 'Q5188229',
"official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
categories = ['science', 'scientific publications']
paging = True
search_url = 'https://api.crossref.org/works'
def request(query, params):
params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
return params
def response(resp):
res = resp.json()
results = []
for record in res['message']['items']:
record_type = record['type']
if record_type == 'book-chapter':
title = record['container-title'][0]
if record['title'][0].lower().strip() != title.lower().strip():
title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
journal = None
else:
title = html_to_text(record['title'][0])
journal = record.get('container-title', [None])[0]
url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
results.append(
{
'template': 'paper.html',
'url': url,
'title': title,
'journal': journal,
'volume': record.get('volume'),
'type': record['type'],
'content': html_to_text(record.get('abstract', '')),
'publisher': record.get('publisher'),
'authors': authors,
'doi': record['DOI'],
'isbn': isbn,
}
)
return results
+56
View File
@@ -0,0 +1,56 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Currency convert (DuckDuckGo)
"""
import json
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"official_api_documentation": 'https://duckduckgo.com/api',
"use_official_api": False,
"require_api_key": False,
"results": 'JSONP',
"description": "Service from DuckDuckGo.",
}
engine_type = 'online_currency'
categories = []
base_url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
weight = 100
https_support = True
def request(_query, params):
params['url'] = base_url.format(params['from'], params['to'])
return params
def response(resp):
"""remove first and last lines to get only json"""
json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
results = []
try:
conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount'])
except ValueError:
return results
answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
resp.search_params['amount'],
resp.search_params['from'],
resp.search_params['amount'] * conversion_rate,
resp.search_params['to'],
conversion_rate,
resp.search_params['from_name'],
resp.search_params['to_name'],
)
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'.format(
resp.search_params['from'].upper(), resp.search_params['to']
)
results.append({'answer': answer, 'url': url})
return results
+252
View File
@@ -0,0 +1,252 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Dailymotion (Videos)
~~~~~~~~~~~~~~~~~~~~
.. _REST GET: https://developers.dailymotion.com/tools/
.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
.. _Video filters API: https://developers.dailymotion.com/api/#video-filters
.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
"""
from typing import TYPE_CHECKING
from datetime import datetime, timedelta
from urllib.parse import urlencode
import time
import babel
from searx.network import get, raise_for_httperror # see https://github.com/searxng/searxng/issues/762
from searx.utils import html_to_text
from searx.exceptions import SearxEngineAPIException
from searx.locales import region_tag, language_tag
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://www.dailymotion.com',
"wikidata_id": 'Q769222',
"official_api_documentation": 'https://www.dailymotion.com/developer',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config
categories = ['videos']
paging = True
number_of_results = 10
time_range_support = True
time_delta_dict = {
"day": timedelta(days=1),
"week": timedelta(days=7),
"month": timedelta(days=31),
"year": timedelta(days=365),
}
safesearch = True
safesearch_params = {
2: {'is_created_for_kids': 'true'},
1: {'is_created_for_kids': 'true'},
0: {},
}
"""True if this video is "Created for Kids" / intends to target an audience
under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
"""
family_filter_map = {
2: 'true',
1: 'true',
0: 'false',
}
"""By default, the family filter is turned on. Setting this parameter to
``false`` will stop filtering-out explicit content from searches and global
contexts (``family_filter`` in `Global API Parameters`_ ).
"""
result_fields = [
'allow_embed',
'description',
'title',
'created_time',
'duration',
'url',
'thumbnail_360_url',
'id',
]
"""`Fields selection`_, by default, a few fields are returned. To request more
specific fields, the ``fields`` parameter is used with the list of fields
SearXNG needs in the response to build a video result list.
"""
search_url = 'https://api.dailymotion.com/videos?'
"""URL to retrieve a list of videos.
- `REST GET`_
- `Global API Parameters`_
- `Video filters API`_
"""
iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
"""URL template to embed video in SearXNG's result list."""
def request(query, params):
if not query:
return False
eng_region: str = traits.get_region(params['searxng_locale'], 'en_US') # type: ignore
eng_lang = traits.get_language(params['searxng_locale'], 'en')
args = {
'search': query,
'family_filter': family_filter_map.get(params['safesearch'], 'false'),
'thumbnail_ratio': 'original', # original|widescreen|square
# https://developers.dailymotion.com/api/#video-filters
'languages': eng_lang,
'page': params['pageno'],
'password_protected': 'false',
'private': 'false',
'sort': 'relevance',
'limit': number_of_results,
'fields': ','.join(result_fields),
}
args.update(safesearch_params.get(params['safesearch'], {}))
# Don't add localization and country arguments if the user does select a
# language (:de, :en, ..)
if len(params['searxng_locale'].split('-')) > 1:
# https://developers.dailymotion.com/api/#global-parameters
args['localization'] = eng_region
args['country'] = eng_region.split('_')[1]
# Insufficient rights for the `ams_country' parameter of route `GET /videos'
# 'ams_country': eng_region.split('_')[1],
time_delta = time_delta_dict.get(params["time_range"])
if time_delta:
created_after = datetime.now() - time_delta
args['created_after'] = datetime.timestamp(created_after)
query_str = urlencode(args)
params['url'] = search_url + query_str
return params
# get response from search-request
def response(resp):
results = []
search_res = resp.json()
# check for an API error
if 'error' in search_res:
raise SearxEngineAPIException(search_res['error'].get('message'))
raise_for_httperror(resp)
# parse results
for res in search_res.get('list', []):
title = res['title']
url = res['url']
content = html_to_text(res['description'])
if len(content) > 300:
content = content[:300] + '...'
publishedDate = datetime.fromtimestamp(res['created_time'], None)
length = time.gmtime(res.get('duration'))
if length.tm_hour:
length = time.strftime("%H:%M:%S", length)
else:
length = time.strftime("%M:%S", length)
thumbnail = res['thumbnail_360_url']
thumbnail = thumbnail.replace("http://", "https://")
item = {
'template': 'videos.html',
'url': url,
'title': title,
'content': content,
'publishedDate': publishedDate,
'length': length,
'thumbnail': thumbnail,
}
# HINT: no mater what the value is, without API token videos can't shown
# embedded
if res['allow_embed']:
item['iframe_src'] = iframe_src.format(video_id=res['id'])
results.append(item)
# return results
return results
def fetch_traits(engine_traits: EngineTraits):
"""Fetch locales & languages from dailymotion.
Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
There are duplications in the locale codes returned from Dailymotion which
can be ignored::
en_EN --> en_GB, en_US
ar_AA --> ar_EG, ar_AE, ar_SA
The language list `api/languages <https://api.dailymotion.com/languages>`_
contains over 7000 *languages* codes (see PR1071_). We use only those
language codes that are used in the locales.
.. _PR1071: https://github.com/searxng/searxng/pull/1071
"""
resp = get('https://api.dailymotion.com/locales')
if not resp.ok: # type: ignore
print("ERROR: response from dailymotion/locales is not OK.")
for item in resp.json()['list']: # type: ignore
eng_tag = item['locale']
if eng_tag in ('en_EN', 'ar_AA'):
continue
try:
sxng_tag = region_tag(babel.Locale.parse(eng_tag))
except babel.UnknownLocaleError:
print("ERROR: item unknown --> %s" % item)
continue
conflict = engine_traits.regions.get(sxng_tag)
if conflict:
if conflict != eng_tag:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
continue
engine_traits.regions[sxng_tag] = eng_tag
locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
resp = get('https://api.dailymotion.com/languages')
if not resp.ok: # type: ignore
print("ERROR: response from dailymotion/languages is not OK.")
for item in resp.json()['list']: # type: ignore
eng_tag = item['code']
if eng_tag in locale_lang_list:
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
engine_traits.languages[sxng_tag] = eng_tag
+62
View File
@@ -0,0 +1,62 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Deepl translation engine"""
from json import loads
about = {
"website": 'https://deepl.com',
"wikidata_id": 'Q43968444',
"official_api_documentation": 'https://www.deepl.com/docs-api',
"use_official_api": True,
"require_api_key": True,
"results": 'JSON',
}
engine_type = 'online_dictionary'
categories = ['general']
url = 'https://api-free.deepl.com/v2/translate'
api_key = None
def request(_query, params):
'''pre-request callback
params<dict>:
- ``method`` : POST/GET
- ``headers``: {}
- ``data``: {} # if method == POST
- ``url``: ''
- ``category``: 'search category'
- ``pageno``: 1 # number of the requested page
'''
params['url'] = url
params['method'] = 'POST'
params['data'] = {'auth_key': api_key, 'text': params['query'], 'target_lang': params['to_lang'][1]}
return params
def response(resp):
results = []
result = loads(resp.text)
translations = result['translations']
infobox = "<dl>"
for translation in translations:
infobox += f"<dd>{translation['text']}</dd>"
infobox += "</dl>"
results.append(
{
'infobox': 'Deepl',
'content': infobox,
}
)
return results
+60
View File
@@ -0,0 +1,60 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Deezer (Music)
"""
from json import loads
from urllib.parse import urlencode
# about
about = {
"website": 'https://deezer.com',
"wikidata_id": 'Q602243',
"official_api_documentation": 'https://developers.deezer.com/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# engine dependent config
categories = ['music']
paging = True
# search-url
url = 'https://api.deezer.com/'
search_url = url + 'search?{query}&index={offset}'
iframe_src = "https://www.deezer.com/plugins/player?type=tracks&id={audioid}"
# do search-request
def request(query, params):
offset = (params['pageno'] - 1) * 25
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
return params
# get response from search-request
def response(resp):
results = []
search_res = loads(resp.text)
# parse results
for result in search_res.get('data', []):
if result['type'] == 'track':
title = result['title']
url = result['link']
if url.startswith('http://'):
url = 'https' + url[4:]
content = '{} - {} - {}'.format(result['artist']['name'], result['album']['title'], result['title'])
# append result
results.append(
{'url': url, 'title': title, 'iframe_src': iframe_src.format(audioid=result['id']), 'content': content}
)
# return results
return results
+73
View File
@@ -0,0 +1,73 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Within this module we implement a *demo offline engine*. Do not look to
close to the implementation, its just a simple example. To get in use of this
*demo* engine add the following entry to your engines list in ``settings.yml``:
.. code:: yaml
- name: my offline engine
engine: demo_offline
shortcut: demo
disabled: false
"""
import json
engine_type = 'offline'
categories = ['general']
disabled = True
timeout = 2.0
about = {
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'JSON',
}
# if there is a need for globals, use a leading underline
_my_offline_engine = None
def init(engine_settings=None):
"""Initialization of the (offline) engine. The origin of this demo engine is a
simple json string which is loaded in this example while the engine is
initialized.
"""
global _my_offline_engine # pylint: disable=global-statement
_my_offline_engine = (
'[ {"value": "%s"}'
', {"value":"first item"}'
', {"value":"second item"}'
', {"value":"third item"}'
']' % engine_settings.get('name')
)
def search(query, request_params):
"""Query (offline) engine and return results. Assemble the list of results from
your local engine. In this demo engine we ignore the 'query' term, usual
you would pass the 'query' term to your local engine to filter out the
results.
"""
ret_val = []
result_list = json.loads(_my_offline_engine)
for row in result_list:
entry = {
'query': query,
'language': request_params['searxng_locale'],
'value': row.get("value"),
# choose a result template or comment out to use the *default*
'template': 'key-value.html',
}
ret_val.append(entry)
return ret_val
+100
View File
@@ -0,0 +1,100 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Within this module we implement a *demo online engine*. Do not look to
close to the implementation, its just a simple example which queries `The Art
Institute of Chicago <https://www.artic.edu>`_
To get in use of this *demo* engine add the following entry to your engines
list in ``settings.yml``:
.. code:: yaml
- name: my online engine
engine: demo_online
shortcut: demo
disabled: false
"""
from json import loads
from urllib.parse import urlencode
engine_type = 'online'
send_accept_language_header = True
categories = ['general']
disabled = True
timeout = 2.0
categories = ['images']
paging = True
page_size = 20
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
image_api = 'https://www.artic.edu/iiif/2/'
about = {
"website": 'https://www.artic.edu',
"wikidata_id": 'Q239303',
"official_api_documentation": 'http://api.artic.edu/docs/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
# if there is a need for globals, use a leading underline
_my_online_engine = None
def init(engine_settings):
"""Initialization of the (online) engine. If no initialization is needed, drop
this init function.
"""
global _my_online_engine # pylint: disable=global-statement
_my_online_engine = engine_settings.get('name')
def request(query, params):
"""Build up the ``params`` for the online request. In this example we build a
URL to fetch images from `artic.edu <https://artic.edu>`__
"""
args = urlencode(
{
'q': query,
'page': params['pageno'],
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
'limit': page_size,
}
)
params['url'] = search_api + args
return params
def response(resp):
"""Parse out the result items from the response. In this example we parse the
response from `api.artic.edu <https://artic.edu>`__ and filter out all
images.
"""
results = []
json_data = loads(resp.text)
for result in json_data['data']:
if not result['image_id']:
continue
results.append(
{
'url': 'https://artic.edu/artworks/%(id)s' % result,
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
'content': result['medium_display'],
'author': ', '.join(result['artist_titles']),
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
'img_format': result['dimensions'],
'template': 'images.html',
}
)
return results
+81
View File
@@ -0,0 +1,81 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
Deviantart (Images)
"""
from urllib.parse import urlencode
from lxml import html
# about
about = {
"website": 'https://www.deviantart.com/',
"wikidata_id": 'Q46523',
"official_api_documentation": 'https://www.deviantart.com/developers/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['images']
paging = True
time_range_support = True
time_range_dict = {
'day': 'popular-24-hours',
'week': 'popular-1-week',
'month': 'popular-1-month',
'year': 'most-recent',
}
# search-url
base_url = 'https://www.deviantart.com'
def request(query, params):
# https://www.deviantart.com/search/deviations?page=5&q=foo
query = {
'page': params['pageno'],
'q': query,
}
if params['time_range'] in time_range_dict:
query['order'] = time_range_dict[params['time_range']]
params['url'] = base_url + '/search/deviations?' + urlencode(query)
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
for row in dom.xpath('//div[contains(@data-hook, "content_row")]'):
for result in row.xpath('./div'):
a_tag = result.xpath('.//a[@data-hook="deviation_link"]')[0]
noscript_tag = a_tag.xpath('.//noscript')
if noscript_tag:
img_tag = noscript_tag[0].xpath('.//img')
else:
img_tag = a_tag.xpath('.//img')
if not img_tag:
continue
img_tag = img_tag[0]
results.append(
{
'template': 'images.html',
'url': a_tag.attrib.get('href'),
'img_src': img_tag.attrib.get('src'),
'title': img_tag.attrib.get('alt'),
}
)
return results
+60
View File
@@ -0,0 +1,60 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Dictzone
"""
from urllib.parse import urljoin
from lxml import html
from searx.utils import eval_xpath
# about
about = {
"website": 'https://dictzone.com/',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
engine_type = 'online_dictionary'
categories = ['general']
url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
results_xpath = './/table[@id="r"]/tr'
https_support = True
def request(query, params):
params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query'])
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
try:
from_result, to_results_raw = eval_xpath(result, './td')
except:
continue
to_results = []
for to_result in eval_xpath(to_results_raw, './p/a'):
t = to_result.text_content()
if t.strip():
to_results.append(to_result.text_content())
results.append(
{
'url': urljoin(str(resp.url), '?%d' % k),
'title': from_result.text_content(),
'content': '; '.join(to_results),
}
)
return results
+64
View File
@@ -0,0 +1,64 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
DigBT (Videos, Music, Files)
"""
from urllib.parse import urljoin
from lxml import html
from searx.utils import extract_text, get_torrent_size
# about
about = {
"website": 'https://digbt.org',
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['videos', 'music', 'files']
paging = True
URL = 'https://digbt.org'
SEARCH_URL = URL + '/search/{query}-time-{pageno}'
FILESIZE = 3
FILESIZE_MULTIPLIER = 4
def request(query, params):
params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
return params
def response(resp):
dom = html.fromstring(resp.text)
search_res = dom.xpath('.//td[@class="x-item"]')
if not search_res:
return list()
results = list()
for result in search_res:
url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
title = extract_text(result.xpath('.//a[@title]'))
content = extract_text(result.xpath('.//div[@class="files"]'))
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
results.append(
{
'url': url,
'title': title,
'content': content,
'filesize': filesize,
'magnetlink': magnetlink,
'seed': 'N/A',
'leech': 'N/A',
'template': 'torrent.html',
}
)
return results
+63
View File
@@ -0,0 +1,63 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Docker Hub (IT)
"""
# pylint: disable=use-dict-literal
from json import loads
from urllib.parse import urlencode
from dateutil import parser
about = {
"website": 'https://hub.docker.com',
"wikidata_id": 'Q100769064',
"official_api_documentation": 'https://docs.docker.com/registry/spec/api/',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
categories = ['it'] # optional
paging = True
base_url = "https://hub.docker.com/"
search_url = base_url + "api/content/v1/products/search?{query}&type=image&page_size=25"
def request(query, params):
params['url'] = search_url.format(query=urlencode(dict(q=query, page=params["pageno"])))
params["headers"]["Search-Version"] = "v3"
return params
def response(resp):
'''post-response callback
resp: requests response object
'''
results = []
body = loads(resp.text)
# Make sure `summaries` isn't `null`
search_res = body.get("summaries")
if search_res:
for item in search_res:
result = {}
# Make sure correct URL is set
filter_type = item.get("filter_type")
is_official = filter_type in ["store", "official"]
if is_official:
result["url"] = base_url + "_/" + item.get('slug', "")
else:
result["url"] = base_url + "r/" + item.get('slug', "")
result["title"] = item.get("name")
result["content"] = item.get("short_description")
result["publishedDate"] = parser.parse(item.get("updated_at") or item.get("created_at"))
result["thumbnail"] = item["logo_url"].get("large") or item["logo_url"].get("small")
results.append(result)
return results
+86
View File
@@ -0,0 +1,86 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Doku Wiki
"""
from urllib.parse import urlencode
from lxml.html import fromstring
from searx.utils import extract_text, eval_xpath
# about
about = {
"website": 'https://www.dokuwiki.org/',
"wikidata_id": 'Q851864',
"official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
paging = False
number_of_results = 5
# search-url
# Doku is OpenSearch compatible
base_url = 'http://localhost:8090'
search_url = (
# fmt: off
'/?do=search'
'&{query}'
# fmt: on
)
# TODO '&startRecord={offset}'
# TODO '&maximumRecords={limit}'
# do search-request
def request(query, params):
params['url'] = base_url + search_url.format(query=urlencode({'id': query}))
return params
# get response from search-request
def response(resp):
results = []
doc = fromstring(resp.text)
# parse results
# Quickhits
for r in eval_xpath(doc, '//div[@class="search_quickresult"]/ul/li'):
try:
res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
except:
continue
if not res_url:
continue
title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
# append result
results.append({'title': title, 'content': "", 'url': base_url + res_url})
# Search results
for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
try:
if r.tag == "dt":
res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
elif r.tag == "dd":
content = extract_text(eval_xpath(r, '.'))
# append result
results.append({'title': title, 'content': content, 'url': base_url + res_url})
except:
continue
if not res_url:
continue
# return results
return results
+437
View File
@@ -0,0 +1,437 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
DuckDuckGo Lite
~~~~~~~~~~~~~~~
"""
from typing import TYPE_CHECKING
import re
from urllib.parse import urlencode
import json
import babel
import lxml.html
from searx import (
locales,
redislib,
external_bang,
)
from searx.utils import (
eval_xpath,
eval_xpath_getindex,
extract_text,
)
from searx.network import get # see https://github.com/searxng/searxng/issues/762
from searx import redisdb
from searx.enginelib.traits import EngineTraits
from searx.exceptions import SearxEngineAPIException
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://lite.duckduckgo.com/lite/',
"wikidata_id": 'Q12805',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
send_accept_language_header = True
"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
``Accept-Language``. Optional the user can select a region filter (but not a
language).
"""
# engine dependent config
categories = ['general', 'web']
paging = True
time_range_support = True
safesearch = True # user can't select but the results are filtered
url = 'https://lite.duckduckgo.com/lite/'
# url_ping = 'https://duckduckgo.com/t/sl_l'
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
def cache_vqd(query, value):
"""Caches a ``vqd`` value from a query.
The vqd value depends on the query string and is needed for the follow up
pages or the images loaded by a XMLHttpRequest:
- DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
- DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
"""
c = redisdb.client()
if c:
logger.debug("cache vqd value: %s", value)
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
c.set(key, value, ex=600)
def get_vqd(query, headers):
"""Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
response.
"""
value = None
c = redisdb.client()
if c:
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
value = c.get(key)
if value:
value = value.decode('utf-8')
logger.debug("re-use cached vqd value: %s", value)
return value
query_url = 'https://duckduckgo.com/?q={query}&atb=v290-5'.format(query=urlencode({'q': query}))
res = get(query_url, headers=headers)
content = res.text # type: ignore
if content.find('vqd=\"') == -1:
raise SearxEngineAPIException('Request failed')
value = content[content.find('vqd=\"') + 5 :]
value = value[: value.find('\'')]
logger.debug("new vqd value: %s", value)
cache_vqd(query, value)
return value
def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
"""Get DuckDuckGo's language identifier from SearXNG's locale.
DuckDuckGo defines its lanaguages by region codes (see
:py:obj:`fetch_traits`).
To get region and language of a DDG service use:
.. code: python
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
It might confuse, but the ``l`` value of the cookie is what SearXNG calls
the *region*:
.. code:: python
# !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
params['cookies']['ad'] = eng_lang
params['cookies']['ah'] = eng_region
params['cookies']['l'] = eng_region
.. hint::
`DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
selection to the user, only a region can be selected by the user
(``eng_region`` from the example above). DDG-lite stores the selected
region in a cookie::
params['cookies']['kl'] = eng_region # 'ar-es'
"""
return eng_traits.custom['lang_region'].get( # type: ignore
sxng_locale, eng_traits.get_language(sxng_locale, default)
)
ddg_reg_map = {
'tw-tzh': 'zh_TW',
'hk-tzh': 'zh_HK',
'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES
'es-ca': 'ca_ES',
'id-en': 'id_ID',
'no-no': 'nb_NO',
'jp-jp': 'ja_JP',
'kr-kr': 'ko_KR',
'xa-ar': 'ar_SA',
'sl-sl': 'sl_SI',
'th-en': 'th_TH',
'vn-en': 'vi_VN',
}
ddg_lang_map = {
# use ar --> ar_EG (Egypt's arabic)
"ar_DZ": 'lang_region',
"ar_JO": 'lang_region',
"ar_SA": 'lang_region',
# use bn --> bn_BD
'bn_IN': 'lang_region',
# use de --> de_DE
'de_CH': 'lang_region',
# use en --> en_US,
'en_AU': 'lang_region',
'en_CA': 'lang_region',
'en_GB': 'lang_region',
# Esperanto
'eo_XX': 'eo',
# use es --> es_ES,
'es_AR': 'lang_region',
'es_CL': 'lang_region',
'es_CO': 'lang_region',
'es_CR': 'lang_region',
'es_EC': 'lang_region',
'es_MX': 'lang_region',
'es_PE': 'lang_region',
'es_UY': 'lang_region',
'es_VE': 'lang_region',
# use fr --> rf_FR
'fr_CA': 'lang_region',
'fr_CH': 'lang_region',
'fr_BE': 'lang_region',
# use nl --> nl_NL
'nl_BE': 'lang_region',
# use pt --> pt_PT
'pt_BR': 'lang_region',
# skip these languages
'od_IN': 'skip',
'io_XX': 'skip',
'tokipona_XX': 'skip',
}
def request(query, params):
# quote ddg bangs
query_parts = []
# for val in re.split(r'(\s+)', query):
for val in re.split(r'(\s+)', query):
if not val.strip():
continue
if val.startswith('!') and external_bang.get_node(external_bang.EXTERNAL_BANGS, val[1:]):
val = f"'{val}'"
query_parts.append(val)
query = ' '.join(query_parts)
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
# eng_lang = get_ddg_lang(traits, params['searxng_locale'])
params['url'] = url
params['method'] = 'POST'
params['data']['q'] = query
# The API is not documented, so we do some reverse engineering and emulate
# what https://lite.duckduckgo.com/lite/ does when you press "next Page"
# link again and again ..
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
params['headers']['Referer'] = 'https://google.com/'
# initial page does not have an offset
if params['pageno'] == 2:
# second page does have an offset of 30
offset = (params['pageno'] - 1) * 30
params['data']['s'] = offset
params['data']['dc'] = offset + 1
elif params['pageno'] > 2:
# third and following pages do have an offset of 30 + n*50
offset = 30 + (params['pageno'] - 2) * 50
params['data']['s'] = offset
params['data']['dc'] = offset + 1
# request needs a vqd argument
params['data']['vqd'] = get_vqd(query, params["headers"])
# initial page does not have additional data in the input form
if params['pageno'] > 1:
params['data']['o'] = form_data.get('o', 'json')
params['data']['api'] = form_data.get('api', 'd.js')
params['data']['nextParams'] = form_data.get('nextParams', '')
params['data']['v'] = form_data.get('v', 'l')
params['data']['kl'] = eng_region
params['cookies']['kl'] = eng_region
params['data']['df'] = ''
if params['time_range'] in time_range_dict:
params['data']['df'] = time_range_dict[params['time_range']]
params['cookies']['df'] = time_range_dict[params['time_range']]
logger.debug("param data: %s", params['data'])
logger.debug("param cookies: %s", params['cookies'])
return params
def response(resp):
if resp.status_code == 303:
return []
results = []
doc = lxml.html.fromstring(resp.text)
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
if len(result_table) == 2:
# some locales (at least China) does not have a "next page" button and
# the layout of the HTML tables is different.
result_table = result_table[1]
elif not len(result_table) >= 3:
# no more results
return []
else:
result_table = result_table[2]
# update form data from response
form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
if len(form):
form = form[0]
form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
logger.debug('form_data: %s', form_data)
value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
query = resp.search_params['data']['q']
cache_vqd(query, value)
tr_rows = eval_xpath(result_table, './/tr')
# In the last <tr> is the form of the 'previous/next page' links
tr_rows = tr_rows[:-1]
len_tr_rows = len(tr_rows)
offset = 0
while len_tr_rows >= offset + 4:
# assemble table rows we need to scrap
tr_title = tr_rows[offset]
tr_content = tr_rows[offset + 1]
offset += 4
# ignore sponsored Adds <tr class="result-sponsored">
if tr_content.get('class') == 'result-sponsored':
continue
a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
if a_tag is None:
continue
td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
if td_content is None:
continue
results.append(
{
'title': a_tag.text_content(),
'content': extract_text(td_content),
'url': a_tag.get('href'),
}
)
return results
def fetch_traits(engine_traits: EngineTraits):
"""Fetch languages & regions from DuckDuckGo.
SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
sense in a SearXNG request since SearXNG's ``all`` will not add a
``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``
is ``wt-wt`` (the region).
Beside regions DuckDuckGo also defines its lanaguages by region codes. By
example these are the english languages in DuckDuckGo:
- en_US
- en_AU
- en_CA
- en_GB
The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
SearXNG's locale.
"""
# pylint: disable=too-many-branches, too-many-statements
# fetch regions
engine_traits.all_locale = 'wt-wt'
# updated from u588 to u661 / should be updated automatically?
resp = get('https://duckduckgo.com/util/u661.js')
if not resp.ok: # type: ignore
print("ERROR: response from DuckDuckGo is not OK.")
pos = resp.text.find('regions:{') + 8 # type: ignore
js_code = resp.text[pos:] # type: ignore
pos = js_code.find('}') + 1
regions = json.loads(js_code[:pos])
for eng_tag, name in regions.items():
if eng_tag == 'wt-wt':
engine_traits.all_locale = 'wt-wt'
continue
region = ddg_reg_map.get(eng_tag)
if region == 'skip':
continue
if not region:
eng_territory, eng_lang = eng_tag.split('-')
region = eng_lang + '_' + eng_territory.upper()
try:
sxng_tag = locales.region_tag(babel.Locale.parse(region))
except babel.UnknownLocaleError:
print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
continue
conflict = engine_traits.regions.get(sxng_tag)
if conflict:
if conflict != eng_tag:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
continue
engine_traits.regions[sxng_tag] = eng_tag
# fetch languages
engine_traits.custom['lang_region'] = {}
pos = resp.text.find('languages:{') + 10 # type: ignore
js_code = resp.text[pos:] # type: ignore
pos = js_code.find('}') + 1
js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
languages = json.loads(js_code)
for eng_lang, name in languages.items():
if eng_lang == 'wt_WT':
continue
babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
if babel_tag == 'skip':
continue
try:
if babel_tag == 'lang_region':
sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
engine_traits.custom['lang_region'][sxng_tag] = eng_lang
continue
sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
except babel.UnknownLocaleError:
print("ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
continue
conflict = engine_traits.languages.get(sxng_tag)
if conflict:
if conflict != eng_lang:
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
continue
engine_traits.languages[sxng_tag] = eng_lang
+255
View File
@@ -0,0 +1,255 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
DuckDuckGo Instant Answer API
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
reverse engineering we can see that some services (e.g. instant answers) still
in use from the DDG search engine.
As far we can say the *instant answers* API does not support languages, or at
least we could not find out how language support should work. It seems that
most of the features are based on English terms.
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode, urlparse, urljoin
from lxml import html
from searx.data import WIKIDATA_UNITS
from searx.utils import extract_text, html_to_text, get_string_replaces_function
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
if TYPE_CHECKING:
import logging
logger: logging.Logger
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"official_api_documentation": 'https://duckduckgo.com/api',
"use_official_api": True,
"require_api_key": False,
"results": 'JSON',
}
send_accept_language_header = True
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
def is_broken_text(text):
"""duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
The href URL is broken, the "Related website" may contains some HTML.
The best solution seems to ignore these results.
"""
return text.startswith('http') and ' ' in text
def result_to_text(text, htmlResult):
# TODO : remove result ending with "Meaning" or "Category" # pylint: disable=fixme
result = None
dom = html.fromstring(htmlResult)
a = dom.xpath('//a')
if len(a) >= 1:
result = extract_text(a[0])
else:
result = text
if not is_broken_text(result):
return result
return None
def request(query, params):
params['url'] = URL.format(query=urlencode({'q': query}))
return params
def response(resp):
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
results = []
search_res = resp.json()
# search_res.get('Entity') possible values (not exhaustive) :
# * continent / country / department / location / waterfall
# * actor / musician / artist
# * book / performing art / film / television / media franchise / concert tour / playwright
# * prepared food
# * website / software / os / programming language / file format / software engineer
# * company
content = ''
heading = search_res.get('Heading', '')
attributes = []
urls = []
infobox_id = None
relatedTopics = []
# add answer if there is one
answer = search_res.get('Answer', '')
if answer:
logger.debug('AnswerType="%s" Answer="%s"', search_res.get('AnswerType'), answer)
if search_res.get('AnswerType') not in ['calc', 'ip']:
results.append({'answer': html_to_text(answer)})
# add infobox
if 'Definition' in search_res:
content = content + search_res.get('Definition', '')
if 'Abstract' in search_res:
content = content + search_res.get('Abstract', '')
# image
image = search_res.get('Image')
image = None if image == '' else image
if image is not None and urlparse(image).netloc == '':
image = urljoin('https://duckduckgo.com', image)
# urls
# Official website, Wikipedia page
for ddg_result in search_res.get('Results', []):
firstURL = ddg_result.get('FirstURL')
text = ddg_result.get('Text')
if firstURL is not None and text is not None:
urls.append({'title': text, 'url': firstURL})
results.append({'title': heading, 'url': firstURL})
# related topics
for ddg_result in search_res.get('RelatedTopics', []):
if 'FirstURL' in ddg_result:
firstURL = ddg_result.get('FirstURL')
text = ddg_result.get('Text')
if not is_broken_text(text):
suggestion = result_to_text(text, ddg_result.get('Result'))
if suggestion != heading and suggestion is not None:
results.append({'suggestion': suggestion})
elif 'Topics' in ddg_result:
suggestions = []
relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions})
for topic_result in ddg_result.get('Topics', []):
suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result'))
if suggestion != heading and suggestion is not None:
suggestions.append(suggestion)
# abstract
abstractURL = search_res.get('AbstractURL', '')
if abstractURL != '':
# add as result ? problem always in english
infobox_id = abstractURL
urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True})
results.append({'url': abstractURL, 'title': heading})
# definition
definitionURL = search_res.get('DefinitionURL', '')
if definitionURL != '':
# add as result ? as answer ? problem always in english
infobox_id = definitionURL
urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
# to merge with wikidata's infobox
if infobox_id:
infobox_id = replace_http_by_https(infobox_id)
# attributes
# some will be converted to urls
if 'Infobox' in search_res:
infobox = search_res.get('Infobox')
if 'content' in infobox:
osm_zoom = 17
coordinates = None
for info in infobox.get('content'):
data_type = info.get('data_type')
data_label = info.get('label')
data_value = info.get('value')
# Workaround: ddg may return a double quote
if data_value == '""':
continue
# Is it an external URL ?
# * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile
# * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id
# * netflix_id
external_url = get_external_url(data_type, data_value)
if external_url is not None:
urls.append({'title': data_label, 'url': external_url})
elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']:
# ignore instance: Wikidata value from "Instance Of" (Qxxxx)
# ignore wiki_maps_trigger: reference to a javascript
# ignore google_play_artist_id: service shutdown
pass
elif data_type == 'string' and data_label == 'Website':
# There is already an URL for the website
pass
elif data_type == 'area':
attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'})
osm_zoom = area_to_osm_zoom(data_value.get('amount'))
elif data_type == 'coordinates':
if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2':
# coordinate on Earth
# get the zoom information from the area
coordinates = info
else:
# coordinate NOT on Earth
attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'})
elif data_type == 'string':
attributes.append({'label': data_label, 'value': data_value})
if coordinates:
data_label = coordinates.get('label')
data_value = coordinates.get('value')
latitude = data_value.get('latitude')
longitude = data_value.get('longitude')
url = get_earth_coordinates_url(latitude, longitude, osm_zoom)
urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'})
if len(heading) > 0:
# TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme
if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0:
results.append({'url': urls[0]['url'], 'title': heading, 'content': content})
else:
results.append(
{
'infobox': heading,
'id': infobox_id,
'content': content,
'img_src': image,
'attributes': attributes,
'urls': urls,
'relatedTopics': relatedTopics,
}
)
return results
def unit_to_str(unit):
for prefix in WIKIDATA_PREFIX:
if unit.startswith(prefix):
wikidata_entity = unit[len(prefix) :]
return WIKIDATA_UNITS.get(wikidata_entity, unit)
return unit
def area_to_str(area):
"""parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
unit = unit_to_str(area.get('unit'))
if unit is not None:
try:
amount = float(area.get('amount'))
return '{} {}'.format(amount, unit)
except ValueError:
pass
return '{} {}'.format(area.get('amount', ''), area.get('unit', ''))
+100
View File
@@ -0,0 +1,100 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
DuckDuckGo Images
~~~~~~~~~~~~~~~~~
"""
from typing import TYPE_CHECKING
from urllib.parse import urlencode
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
from searx.engines.duckduckgo import (
get_ddg_lang,
get_vqd,
)
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
# about
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"use_official_api": False,
"require_api_key": False,
"results": 'JSON (site requires js to get images)',
}
# engine dependent config
categories = ['images', 'web']
paging = True
safesearch = True
send_accept_language_header = True
safesearch_cookies = {0: '-2', 1: None, 2: '1'}
safesearch_args = {0: '1', 1: None, 2: '1'}
def request(query, params):
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
args = {
'q': query,
'o': 'json',
# 'u': 'bing',
'l': eng_region,
'vqd': get_vqd(query, params["headers"]),
}
if params['pageno'] > 1:
args['s'] = (params['pageno'] - 1) * 100
params['cookies']['ad'] = eng_lang # zh_CN
params['cookies']['ah'] = eng_region # "us-en,de-de"
params['cookies']['l'] = eng_region # "hk-tzh"
logger.debug("cookies: %s", params['cookies'])
safe_search = safesearch_cookies.get(params['safesearch'])
if safe_search is not None:
params['cookies']['p'] = safe_search # "-2", "1"
safe_search = safesearch_args.get(params['safesearch'])
if safe_search is not None:
args['p'] = safe_search # "-1", "1"
args = urlencode(args)
params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,')
params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01'
params['headers']['Referer'] = 'https://duckduckgo.com/'
params['headers']['X-Requested-With'] = 'XMLHttpRequest'
logger.debug("headers: %s", params['headers'])
return params
def response(resp):
results = []
res_json = resp.json()
for result in res_json['results']:
results.append(
{
'template': 'images.html',
'title': result['title'],
'content': '',
'thumbnail_src': result['thumbnail'],
'img_src': result['image'],
'url': result['url'],
'img_format': '%s x %s' % (result['width'], result['height']),
'source': result['source'],
}
)
return results
+163
View File
@@ -0,0 +1,163 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""
DuckDuckGo Weather
~~~~~~~~~~~~~~~~~~
"""
from typing import TYPE_CHECKING
from json import loads
from urllib.parse import quote
from datetime import datetime
from flask_babel import gettext
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
from searx.engines.duckduckgo import get_ddg_lang
from searx.enginelib.traits import EngineTraits
if TYPE_CHECKING:
import logging
logger: logging.Logger
traits: EngineTraits
about = {
"website": 'https://duckduckgo.com/',
"wikidata_id": 'Q12805',
"official_api_documentation": None,
"use_official_api": True,
"require_api_key": False,
"results": "JSON",
}
send_accept_language_header = True
# engine dependent config
categories = ["weather"]
URL = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
def generate_condition_table(condition):
res = ""
res += f"<tr><td><b>{gettext('Condition')}</b></td>" f"<td><b>{condition['summary']}</b></td></tr>"
res += (
f"<tr><td><b>{gettext('Temperature')}</b></td>"
f"<td><b>{f_to_c(condition['temperature'])}°C / {condition['temperature']}°F</b></td></tr>"
)
res += (
f"<tr><td>{gettext('Feels like')}</td><td>{f_to_c(condition['apparentTemperature'])}°C / "
f"{condition['apparentTemperature']}°F</td></tr>"
)
res += (
f"<tr><td>{gettext('Wind')}</td><td>{condition['windBearing']}° — "
f"{(condition['windSpeed'] * 1.6093440006147):.2f} km/h / {condition['windSpeed']} mph</td></tr>"
)
res += f"<tr><td>{gettext('Visibility')}</td><td>{condition['visibility']} km</td>"
res += f"<tr><td>{gettext('Humidity')}</td><td>{(condition['humidity'] * 100):.1f}%</td></tr>"
return res
def generate_day_table(day):
res = ""
res += (
f"<tr><td>{gettext('Min temp.')}</td><td>{f_to_c(day['temperatureLow'])}°C / "
f"{day['temperatureLow']}°F</td></tr>"
)
res += (
f"<tr><td>{gettext('Max temp.')}</td><td>{f_to_c(day['temperatureHigh'])}°C / "
f"{day['temperatureHigh']}°F</td></tr>"
)
res += f"<tr><td>{gettext('UV index')}</td><td>{day['uvIndex']}</td></tr>"
res += (
f"<tr><td>{gettext('Sunrise')}</td><td>{datetime.fromtimestamp(day['sunriseTime']).strftime('%H:%M')}</td></tr>"
)
res += (
f"<tr><td>{gettext('Sunset')}</td><td>{datetime.fromtimestamp(day['sunsetTime']).strftime('%H:%M')}</td></tr>"
)
return res
def request(query, params):
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
# !ddw paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
params['cookies']['ad'] = eng_lang
params['cookies']['ah'] = eng_region
params['cookies']['l'] = eng_region
logger.debug("cookies: %s", params['cookies'])
params["url"] = URL.format(query=quote(query), lang=eng_lang.split('_')[0])
return params
def f_to_c(temperature):
return "%.2f" % ((temperature - 32) / 1.8)
def response(resp):
results = []
if resp.text.strip() == "ddg_spice_forecast();":
return []
result = loads(resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2])
current = result["currently"]
title = result['flags']['ddg-location']
infobox = f"<h3>{gettext('Current condition')}</h3><table><tbody>"
infobox += generate_condition_table(current)
infobox += "</tbody></table>"
last_date = None
for time in result['hourly']['data']:
current_time = datetime.fromtimestamp(time['time'])
if last_date != current_time.date():
if last_date is not None:
infobox += "</tbody></table>"
infobox += f"<h3>{current_time.strftime('%Y-%m-%d')}</h3>"
infobox += "<table><tbody>"
for day in result['daily']['data']:
if datetime.fromtimestamp(day['time']).date() == current_time.date():
infobox += generate_day_table(day)
infobox += "</tbody></table><table><tbody>"
last_date = current_time.date()
infobox += f"<tr><td rowspan=\"7\"><b>{current_time.strftime('%H:%M')}</b></td></tr>"
infobox += generate_condition_table(time)
infobox += "</tbody></table>"
results.append(
{
"infobox": title,
"content": infobox,
}
)
return results
+83
View File
@@ -0,0 +1,83 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Duden
"""
import re
from urllib.parse import quote, urljoin
from lxml import html
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
from searx.network import raise_for_httperror
# about
about = {
"website": 'https://www.duden.de',
"wikidata_id": 'Q73624591',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
"language": 'de',
}
categories = ['dictionaries']
paging = True
# search-url
base_url = 'https://www.duden.de/'
search_url = base_url + 'suchen/dudenonline/{query}?search_api_fulltext=&page={offset}'
def request(query, params):
'''pre-request callback
params<dict>:
method : POST/GET
headers : {}
data : {} # if method == POST
url : ''
category: 'search category'
pageno : 1 # number of the requested page
'''
offset = params['pageno'] - 1
if offset == 0:
search_url_fmt = base_url + 'suchen/dudenonline/{query}'
params['url'] = search_url_fmt.format(query=quote(query))
else:
params['url'] = search_url.format(offset=offset, query=quote(query))
# after the last page of results, spelling corrections are returned after a HTTP redirect
# whatever the page number is
params['soft_max_redirects'] = 1
params['raise_for_httperror'] = False
return params
def response(resp):
'''post-response callback
resp: requests response object
'''
results = []
if resp.status_code == 404:
return results
raise_for_httperror(resp)
dom = html.fromstring(resp.text)
number_of_results_element = eval_xpath_getindex(
dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None
)
if number_of_results_element is not None:
number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)
results.append({'number_of_results': int(number_of_results_string)})
for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):
url = eval_xpath_getindex(result, './/h2/a', 0).get('href')
url = urljoin(base_url, url)
title = eval_xpath(result, 'string(.//h2/a)').strip()
content = extract_text(eval_xpath(result, './/p'))
# append result
results.append({'url': url, 'title': title, 'content': content})
return results
+22
View File
@@ -0,0 +1,22 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Dummy Offline
"""
# about
about = {
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
def search(query, request_params):
return [
{
'result': 'this is what you get',
}
]
+24
View File
@@ -0,0 +1,24 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Dummy
"""
# about
about = {
"website": None,
"wikidata_id": None,
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'empty array',
}
# do search-request
def request(query, params):
return params
# get response from search-request
def response(resp):
return []
+76
View File
@@ -0,0 +1,76 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Ebay (Videos, Music, Files)
"""
from lxml import html
from searx.engines.xpath import extract_text
from urllib.parse import quote
# about
about = {
"website": 'https://www.ebay.com',
"wikidata_id": 'Q58024',
"official_api_documentation": 'https://developer.ebay.com/',
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = ['shopping']
paging = True
# Set base_url in settings.yml in order to
# have the desired local TLD.
base_url = None
search_url = '/sch/i.html?_nkw={query}&_sacat={pageno}'
results_xpath = '//li[contains(@class, "s-item")]'
url_xpath = './/a[@class="s-item__link"]/@href'
title_xpath = './/h3[@class="s-item__title"]'
content_xpath = './/div[@span="SECONDARY_INFO"]'
price_xpath = './/div[contains(@class, "s-item__detail")]/span[@class="s-item__price"][1]/text()'
shipping_xpath = './/span[contains(@class, "s-item__shipping")]/text()'
source_country_xpath = './/span[contains(@class, "s-item__location")]/text()'
thumbnail_xpath = './/img[@class="s-item__image-img"]/@src'
def request(query, params):
params['url'] = f'{base_url}' + search_url.format(query=quote(query), pageno=params['pageno'])
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
results_dom = dom.xpath(results_xpath)
if not results_dom:
return []
for result_dom in results_dom:
url = extract_text(result_dom.xpath(url_xpath))
title = extract_text(result_dom.xpath(title_xpath))
content = extract_text(result_dom.xpath(content_xpath))
price = extract_text(result_dom.xpath(price_xpath))
shipping = extract_text(result_dom.xpath(shipping_xpath))
source_country = extract_text(result_dom.xpath(source_country_xpath))
thumbnail = extract_text(result_dom.xpath(thumbnail_xpath))
if title == "":
continue
results.append(
{
'url': url,
'title': title,
'content': content,
'price': price,
'shipping': shipping,
'source_country': source_country,
'thumbnail': thumbnail,
'template': 'products.html',
}
)
return results
+178
View File
@@ -0,0 +1,178 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""".. sidebar:: info
- :origin:`elasticsearch.py <searx/engines/elasticsearch.py>`
- `Elasticsearch <https://www.elastic.co/elasticsearch/>`_
- `Elasticsearch Guide
<https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html>`_
- `Install Elasticsearch
<https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html>`_
Elasticsearch_ supports numerous ways to query the data it is storing. At the
moment the engine supports the most popular search methods (``query_type``):
- ``match``,
- ``simple_query_string``,
- ``term`` and
- ``terms``.
If none of the methods fit your use case, you can select ``custom`` query type
and provide the JSON payload to submit to Elasticsearch in
``custom_query_json``.
Example
=======
The following is an example configuration for an Elasticsearch_ instance with
authentication configured to read from ``my-index`` index.
.. code:: yaml
- name: elasticsearch
shortcut: es
engine: elasticsearch
base_url: http://localhost:9200
username: elastic
password: changeme
index: my-index
query_type: match
# custom_query_json: '{ ... }'
enable_http: true
"""
from json import loads, dumps
from searx.exceptions import SearxEngineAPIException
base_url = 'http://localhost:9200'
username = ''
password = ''
index = ''
search_url = base_url + '/' + index + '/_search'
query_type = 'match'
custom_query_json = {}
show_metadata = False
categories = ['general']
def init(engine_settings):
if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
raise ValueError('unsupported query type', engine_settings['query_type'])
if index == '':
raise ValueError('index cannot be empty')
def request(query, params):
if query_type not in _available_query_types:
return params
if username and password:
params['auth'] = (username, password)
params['url'] = search_url
params['method'] = 'GET'
params['data'] = dumps(_available_query_types[query_type](query))
params['headers']['Content-Type'] = 'application/json'
return params
def _match_query(query):
"""
The standard for full text queries.
searx format: "key:value" e.g. city:berlin
REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
"""
try:
key, value = query.split(':')
except Exception as e:
raise ValueError('query format must be "key:value"') from e
return {"query": {"match": {key: {'query': value}}}}
def _simple_query_string_query(query):
"""
Accepts query strings, but it is less strict than query_string
The field used can be specified in index.query.default_field in Elasticsearch.
REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
"""
return {'query': {'simple_query_string': {'query': query}}}
def _term_query(query):
"""
Accepts one term and the name of the field.
searx format: "key:value" e.g. city:berlin
REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
"""
try:
key, value = query.split(':')
except Exception as e:
raise ValueError('query format must be key:value') from e
return {'query': {'term': {key: value}}}
def _terms_query(query):
"""
Accepts multiple terms and the name of the field.
searx format: "key:value1,value2" e.g. city:berlin,paris
REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
"""
try:
key, values = query.split(':')
except Exception as e:
raise ValueError('query format must be key:value1,value2') from e
return {'query': {'terms': {key: values.split(',')}}}
def _custom_query(query):
key, value = query.split(':')
custom_query = custom_query_json
for query_key, query_value in custom_query.items():
if query_key == '{{KEY}}':
custom_query[key] = custom_query.pop(query_key)
if query_value == '{{VALUE}}':
custom_query[query_key] = value
return custom_query
def response(resp):
results = []
resp_json = loads(resp.text)
if 'error' in resp_json:
raise SearxEngineAPIException(resp_json['error'])
for result in resp_json['hits']['hits']:
r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()}
r['template'] = 'key-value.html'
if show_metadata:
r['metadata'] = {'index': result['_index'], 'id': result['_id'], 'score': result['_score']}
results.append(r)
return results
_available_query_types = {
# Full text queries
# https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
'match': _match_query,
'simple_query_string': _simple_query_string_query,
# Term-level queries
# https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
'term': _term_query,
'terms': _terms_query,
# Query JSON defined by the instance administrator.
'custom': _custom_query,
}
+67
View File
@@ -0,0 +1,67 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""Emojipedia
Emojipedia is an emoji reference website which documents the meaning and
common usage of emoji characters in the Unicode Standard. It is owned by Zedge
since 2021. Emojipedia is a voting member of The Unicode Consortium.[1]
[1] https://en.wikipedia.org/wiki/Emojipedia
"""
from urllib.parse import urlencode
from lxml import html
from searx.utils import (
eval_xpath_list,
eval_xpath_getindex,
extract_text,
)
about = {
"website": 'https://emojipedia.org',
"wikidata_id": 'Q22908129',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
categories = []
paging = False
time_range_support = False
base_url = 'https://emojipedia.org'
search_url = base_url + '/search/?{query}'
def request(query, params):
params['url'] = search_url.format(
query=urlencode({'q': query}),
)
return params
def response(resp):
results = []
dom = html.fromstring(resp.text)
for result in eval_xpath_list(dom, "//ol[@class='search-results']/li"):
extracted_desc = extract_text(eval_xpath_getindex(result, './/p', 0))
if 'No results found.' in extracted_desc:
break
link = eval_xpath_getindex(result, './/h2/a', 0)
url = base_url + link.attrib.get('href')
title = extract_text(link)
content = extracted_desc
res = {'url': url, 'title': title, 'content': content}
results.append(res)
return results
+54
View File
@@ -0,0 +1,54 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
F-Droid (a repository of FOSS applications for Android)
"""
from urllib.parse import urlencode
from lxml import html
from searx.utils import extract_text
# about
about = {
"website": 'https://f-droid.org/',
"wikidata_id": 'Q1386210',
"official_api_documentation": None,
"use_official_api": False,
"require_api_key": False,
"results": 'HTML',
}
# engine dependent config
categories = ['files', 'apps']
paging = True
# search-url
base_url = 'https://search.f-droid.org/'
search_url = base_url + '?{query}'
# do search-request
def request(query, params):
query = urlencode({'q': query, 'page': params['pageno'], 'lang': ''})
params['url'] = search_url.format(query=query)
return params
# get response from search-request
def response(resp):
results = []
dom = html.fromstring(resp.text)
for app in dom.xpath('//a[@class="package-header"]'):
app_url = app.xpath('./@href')[0]
app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()'))
app_content = (
extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip()
+ ' - '
+ extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip()
)
app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0]
results.append({'url': app_url, 'title': app_title, 'content': app_content, 'img_src': app_img_src})
return results

Some files were not shown because too many files have changed in this diff Show More