Compare commits
251 Commits
0f2c464392
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| c89b8129ec | |||
| c98b2d8232 | |||
| 3a9b3786c5 | |||
| 1131eceb94 | |||
| 586be50567 | |||
| d0e08b8fc1 | |||
| 487117b31a | |||
| 82fa90d843 | |||
| ef1e7432ee | |||
| 427ea20e43 | |||
| 3298219ccf | |||
| b30c7f3a8a | |||
| 374511c123 | |||
| 313f7f1c21 | |||
| 6aa78525c2 | |||
| 5e0de23ff7 | |||
| 3c655dabcf | |||
| 60e59f95a8 | |||
| cdf37fb2c9 | |||
| f56b734cb4 | |||
| 10c4373c0e | |||
| 2619d86dc1 | |||
| 1b679a4f09 | |||
| 02033cd3f9 | |||
| d932bc57d0 | |||
| c4cfa8081f | |||
| 90c9094a6f | |||
| 36c5f2b1e3 | |||
| 7bc2729ef6 | |||
| 9d79f828e4 | |||
| 22b3659cdf | |||
| 4440fddb2b | |||
| d74000fbd0 | |||
| 9efdb5c781 | |||
| e7fd52616d | |||
| b7b4b4d36d | |||
| 40f5a3ce0d | |||
| 8e43118661 | |||
| 0e76f3cef1 | |||
| 8d18ab7059 | |||
| 6a959d85c3 | |||
| 79c583eca3 | |||
| 7fcafed3f9 | |||
| 0b67830d7f | |||
| d878a0f9b8 | |||
| 9c38910a67 | |||
| 4a38a9421d | |||
| 5d32693925 | |||
| 696cecfecb | |||
| c0360a14b9 | |||
| 5589594d2c | |||
| 0bd41b2fb1 | |||
| 5f10d0366e | |||
| 9f98101c5d | |||
| 0c20676590 | |||
| 59e2c2b9a3 | |||
| 61d49e85a3 | |||
| 8f84ed6d83 | |||
| 24cbb02bff | |||
| 685a472572 | |||
| 9de1bee542 | |||
| 306e2c14db | |||
| 63b47b59b5 | |||
| 52bd2d9fa2 | |||
| 034ad17cf9 | |||
| ef2846562b | |||
| 52e2102f93 | |||
| a9b21912db | |||
| c9e4aeb8d6 | |||
| 96a44d5da6 | |||
| 0d936677e5 | |||
| 5c38b92bc8 | |||
| fc5c882193 | |||
| a4501d4034 | |||
| c0958d8f02 | |||
| 2a97864a06 | |||
| bd23339edd | |||
| f8091a5c76 | |||
| 6a2639f931 | |||
| fae5e119d1 | |||
| 872038d0c9 | |||
| 7a6db9fcfd | |||
| d6a8979d55 | |||
| a9a8a708d3 | |||
| 749c0d500d | |||
| 8f112af65b | |||
| 580e9b9aed | |||
| c77db36865 | |||
| e11dc22999 | |||
| 862ddd42f8 | |||
| d0e7e52150 | |||
| f0aa9941d8 | |||
| 6789451bd6 | |||
| b8d9e62954 | |||
| 9f36dabcdc | |||
| 3c78e9c140 | |||
| 40a976f712 | |||
| 451be4ab0d | |||
| b848d6b807 | |||
| 4695839df4 | |||
| cb92ebc70e | |||
| 5c3bc0317c | |||
| c7dd9f2229 | |||
| 7258d150ad | |||
| 020d6ecb79 | |||
| b3cc235164 | |||
| b422a55c02 | |||
| b6d2e4ee62 | |||
| 438cf57127 | |||
| 6ada434618 | |||
| 2a6c905eb8 | |||
| 9ad21c5846 | |||
| 2bab864d5e | |||
| ebae1ed990 | |||
| 5e49b6774b | |||
| 228413d780 | |||
| 7279ade925 | |||
| a826c85e08 | |||
| 1918b83061 | |||
| fa54b956a7 | |||
| d69c74e537 | |||
| 0654d9673f | |||
| ff0c413d06 | |||
| f69028963b | |||
| 624d7be17c | |||
| b17adee7cc | |||
| 27c5c3f631 | |||
| 361d2dc87b | |||
| 2499924afc | |||
| b2ff514a71 | |||
| b5cfdde00f | |||
| d99e2767b5 | |||
| 7f8e920fa1 | |||
| c10b834be0 | |||
| f08a567933 | |||
| 85d7859b14 | |||
| d06c53ef0b | |||
| 85cf56fcaf | |||
| a576dfdaa0 | |||
| cf516ab1f4 | |||
| e7feec9146 | |||
| 49caca5700 | |||
| 97f0bac724 | |||
| f8370b6a99 | |||
| d27a994fe6 | |||
| 937c6164d8 | |||
| 4eed2fd710 | |||
| ba41ce7eb0 | |||
| 26b7b461a3 | |||
| 72cda2dc92 | |||
| 2b83cd5599 | |||
| c0ed8cfc5f | |||
| 7646f8187b | |||
| 8d462a83c7 | |||
| 29856c4d1c | |||
| ce626ee0c8 | |||
| 18104468aa | |||
| f136f49e51 | |||
| ee609201b3 | |||
| 25f91e301c | |||
| cbdf9c9562 | |||
| de82d295fb | |||
| 8224009aa6 | |||
| d98f74a9d0 | |||
| 3d49ebdeee | |||
| a515e3e25b | |||
| 0a3cfa4631 | |||
| 15b349604c | |||
| 4a0ab9d184 | |||
| 155373a171 | |||
| a29fcc85d0 | |||
| b6ff09513f | |||
| a0b9dd980b | |||
| 649965e97a | |||
| db57390bf9 | |||
| 4e61ac701f | |||
| cd47fe324e | |||
| d6baa39bf4 | |||
| 6f47e654a8 | |||
| 24047b0eaa | |||
| 0ddbb7d7ad | |||
| 43f25321d7 | |||
| 9678c6a8f1 | |||
| c1401e3e08 | |||
| 5c600d0af0 | |||
| e3d463d511 | |||
| ff2d323309 | |||
| 47a5908430 | |||
| 8448f2bb94 | |||
| cfbefed2e3 | |||
| 3b3f06a727 | |||
| 8c82830af8 | |||
| 9ebb3c6c93 | |||
| 86fba4f43f | |||
| d6325494c7 | |||
| 2021ef37ae | |||
| aae70d93b4 | |||
| 7bc4d6699d | |||
| e4dd0394fe | |||
| 1a65661474 | |||
| 01279edc5b | |||
| 4d713b02f3 | |||
| 4d681021e1 | |||
| 30f53eb668 | |||
| a9593d7589 | |||
| 232fdfbb36 | |||
| 9f9cfaf4be | |||
| 8337b53da3 | |||
| d519139615 | |||
| 8c5a902613 | |||
| f09ac96e06 | |||
| 4ff815e73e | |||
| 2d17c0a70d | |||
| 4e4ffa7f97 | |||
| 9b96c94d5d | |||
| ab38abf661 | |||
| 94565837f1 | |||
| 700328d677 | |||
| 23315dd5ad | |||
| 7945518a47 | |||
| 037e3fc46b | |||
| 8930cb8459 | |||
| f221b12f8d | |||
| 39debaf4b4 | |||
| 8bed8fdcb2 | |||
| 3b1e0efa19 | |||
| cf45a16c67 | |||
| 698fc19e82 | |||
| 3c2d28c763 | |||
| 8d0ecf0adf | |||
| f7b4cc22b9 | |||
| b7983b30d3 | |||
| 1d395287dc | |||
| 634abe4b39 | |||
| 7f70bd2acb | |||
| 417973b1cb | |||
| cd855028d6 | |||
| a24cc728c3 | |||
| 663dc51237 | |||
| 1ad8fbba93 | |||
| f03268e98b | |||
| 778dcf1f26 | |||
| 5ea6fb5089 | |||
| 7e85deb78e | |||
| 8e85d33f74 | |||
| c9fe9ce512 | |||
| 440af51741 | |||
| 03ad03c357 | |||
| 9da680730d | |||
| 56b6b93818 | |||
| c32c809581 |
@@ -0,0 +1,9 @@
|
|||||||
|
# Copy this file to default-environment.env (non-secret defaults) and update values.
|
||||||
|
PROJECT_ROOT=/path/to/docker
|
||||||
|
DOMAIN=example.com
|
||||||
|
TZ=Etc/UTC
|
||||||
|
EMAIL=admin@example.com
|
||||||
|
|
||||||
|
# Required secret file path used by compose services.
|
||||||
|
# Create this file from secrets/.env.secrets.example and keep it out of git.
|
||||||
|
SECRETS_ENV_FILE=${PROJECT_ROOT}/secrets/stack-secrets.env
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
name: Generate Docs
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
schedule:
|
||||||
|
- cron: "0 */6 * * *"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
generate:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
DOCKER_HOST: tcp://docker-socket-proxy:2375
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Install docker CLI
|
||||||
|
run: |
|
||||||
|
apt-get update
|
||||||
|
apt-get install -y docker.io docker-compose
|
||||||
|
|
||||||
|
- name: Generate docs
|
||||||
|
run: |
|
||||||
|
scripts/docs/generate-all.sh
|
||||||
|
|
||||||
|
- name: Validate generated docs
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
|
||||||
|
test -s docs/generated/docker-compose.resolved.yml
|
||||||
|
test -s docs/generated/host-topology.md
|
||||||
|
test -s docs/public/physical-topology.svg
|
||||||
|
test -s docs/public/docker-traefik-dynu.svg
|
||||||
|
|
||||||
|
! grep -R "Host inventory JSON not found" docs/public docs/diagrams
|
||||||
|
! grep -R "Generate terraform inventory" docs/public docs/diagrams
|
||||||
|
|
||||||
|
# Ensure no obvious secrets leaked
|
||||||
|
! grep -R -E -i "password|token|api[_-]?key|secret" docs/public \
|
||||||
|
|| (echo "Secret-like string detected"; exit 1)
|
||||||
|
|
||||||
|
- name: Commit changes
|
||||||
|
run: |
|
||||||
|
git config user.name "docs-bot"
|
||||||
|
git config user.email "docs-bot@local"
|
||||||
|
|
||||||
|
git add docs/generated docs/diagrams docs/public data/terraform/proxmox-inventory.json || true
|
||||||
|
|
||||||
|
if git diff --cached --quiet; then
|
||||||
|
echo "No changes to commit"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
git commit -m "docs: regenerate documentation artifacts"
|
||||||
|
|
||||||
|
- name: Push to Gitea
|
||||||
|
run: |
|
||||||
|
git push origin HEAD:main
|
||||||
|
|
||||||
|
- name: Push to GitHub mirror
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_PUSH_TOKEN }}
|
||||||
|
GITHUB_MIRROR_REPO: ${{ vars.GITHUB_MIRROR_REPO }}
|
||||||
|
run: |
|
||||||
|
test -n "$GITHUB_TOKEN"
|
||||||
|
test -n "$GITHUB_MIRROR_REPO"
|
||||||
|
git remote add github "https://$GITHUB_TOKEN@github.com/$GITHUB_MIRROR_REPO.git" || true
|
||||||
|
git push github HEAD:main
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
name: Validate Docs (Gitea)
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [ main ]
|
||||||
|
paths:
|
||||||
|
- "docs/**"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
validate:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Validate docs
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
|
||||||
|
test -d docs/public
|
||||||
|
test -s docs/public/physical-topology.svg
|
||||||
|
|
||||||
|
! grep -R "Host inventory JSON not found" docs/public
|
||||||
|
! grep -R "Generate terraform inventory" docs/public
|
||||||
@@ -0,0 +1,35 @@
|
|||||||
|
name: Validate committed public docs
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
validate-public-docs:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Ensure committed docs/public exists
|
||||||
|
run: |
|
||||||
|
test -d docs/public
|
||||||
|
test -n "$(find docs/public -mindepth 1 -print -quit)"
|
||||||
|
- name: Install MkDocs
|
||||||
|
run: |
|
||||||
|
python3 -m pip install --user mkdocs
|
||||||
|
- name: Validate docs content
|
||||||
|
run: |
|
||||||
|
set -e
|
||||||
|
test -s docs/public/physical-topology.svg
|
||||||
|
test -s docs/public/docker-traefik-dynu.svg
|
||||||
|
! grep -R "Host inventory JSON not found" docs/public
|
||||||
|
! grep -R "Generate terraform inventory" docs/public
|
||||||
|
! rg -n -i "password|token|api[_-]?key|secret" docs/public
|
||||||
|
- name: Build MkDocs site
|
||||||
|
run: |
|
||||||
|
python3 -m mkdocs build -f mkdocs-public.yml --strict
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
name: Publish documentation site
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pages: write
|
||||||
|
id-token: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: github-pages
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Ensure committed docs/public exists
|
||||||
|
run: |
|
||||||
|
test -d docs/public
|
||||||
|
test -n "$(find docs/public -mindepth 1 -print -quit)"
|
||||||
|
|
||||||
|
- name: Install Graphviz
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y graphviz
|
||||||
|
dot -V
|
||||||
|
|
||||||
|
- name: Validate sanitized diagram artifacts
|
||||||
|
run: |
|
||||||
|
test -f docs/public/physical-topology.svg
|
||||||
|
test -f docs/public/docker-traefik-dynu.svg
|
||||||
|
! rg -n "Graphviz dot not found" docs/public/*.svg
|
||||||
|
! rg -n "lan\.ddnsgeek\.com" docs/public/*.svg docs/public/*.md
|
||||||
|
! rg -n -i "password|token|api_key|secret" docs/public/*.svg
|
||||||
|
|
||||||
|
- name: Install MkDocs
|
||||||
|
run: |
|
||||||
|
python3 -m pip install --user mkdocs
|
||||||
|
|
||||||
|
- name: Build public MkDocs site
|
||||||
|
run: |
|
||||||
|
python3 -m mkdocs build -f mkdocs-public.yml --strict
|
||||||
|
|
||||||
|
- name: Verify published content excludes internal/generated docs
|
||||||
|
run: |
|
||||||
|
test -d site-public
|
||||||
|
test ! -e site-public/generated
|
||||||
|
test ! -e site-public/docker
|
||||||
|
|
||||||
|
- name: Verify expected 404-only paths are not generated
|
||||||
|
run: |
|
||||||
|
test ! -e site-public/generated/compose-inventory/index.html
|
||||||
|
test ! -e site-public/generated/prometheus-rules/index.html
|
||||||
|
test ! -e site-public/docker/index.html
|
||||||
|
|
||||||
|
- name: Configure GitHub Pages
|
||||||
|
uses: actions/configure-pages@v5
|
||||||
|
|
||||||
|
- name: Upload GitHub Pages artifact
|
||||||
|
uses: actions/upload-pages-artifact@v3
|
||||||
|
with:
|
||||||
|
path: site-public
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
|
||||||
|
environment:
|
||||||
|
name: github-pages
|
||||||
|
url: ${{ steps.deployment.outputs.page_url }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Deploy to GitHub Pages
|
||||||
|
id: deployment
|
||||||
|
uses: actions/deploy-pages@v4
|
||||||
+52
-26
@@ -1,27 +1,53 @@
|
|||||||
# ignore all files and directories
|
#*
|
||||||
*
|
#!**/
|
||||||
# allow git to enter directories
|
#!.gitignore
|
||||||
!*/
|
#!**/Dockerfile
|
||||||
venv/
|
#!docker-compose.yml
|
||||||
core/crowdsec/config
|
**/data/
|
||||||
database
|
apps/gitea/runner-data/
|
||||||
data
|
**/db/
|
||||||
db
|
**/database/
|
||||||
docs
|
apps/nextcloud/config/
|
||||||
examples
|
core/crowdsec/config/
|
||||||
searx
|
**/.env
|
||||||
tests
|
!monitoring/node-red/data/
|
||||||
utils
|
apps/stockfill/
|
||||||
stockfill
|
apps/shift-recorder/
|
||||||
shift-recorder
|
|
||||||
# keep essential project files
|
|
||||||
!.gitignore
|
|
||||||
!.gitattributes
|
|
||||||
|
|
||||||
# allow YAMLs, shell scripts, and others
|
!apps/searxng/
|
||||||
!*.yml
|
apps/searxng/*
|
||||||
!*.yaml
|
|
||||||
!*.sh
|
!apps/searxng/Dockerfile
|
||||||
!*.py
|
!apps/searxng/docker-compose.yml
|
||||||
!*.Dockerfile
|
venv/
|
||||||
!Dockerfile
|
core/authelia/users_database.yml
|
||||||
|
monitoring/influxdb/*
|
||||||
|
!monitoring/influxdb/docker-compose.yml
|
||||||
|
secrets/*
|
||||||
|
!secrets/.env.secrets.example
|
||||||
|
!secrets/inventory.json
|
||||||
|
!.env.example
|
||||||
|
core/traefik/certs/*
|
||||||
|
!core/traefik/certs/.gitkeep
|
||||||
|
site/
|
||||||
|
|
||||||
|
# Docs generation artifacts intentionally tracked
|
||||||
|
!data/terraform/proxmox-inventory.json
|
||||||
|
!infrastructure/terraform/dynu/generated/
|
||||||
|
!infrastructure/terraform/dynu/generated/dynu_dns_records_inventory.json
|
||||||
|
!docs/generated/
|
||||||
|
!docs/generated/docker-compose.resolved.yml
|
||||||
|
!docs/generated/host-topology.md
|
||||||
|
!docs/diagrams/
|
||||||
|
!docs/diagrams/*.svg
|
||||||
|
!docs/public/
|
||||||
|
!docs/public/*.md
|
||||||
|
!docs/public/*.svg
|
||||||
|
|
||||||
|
# Terraform local/state artifacts
|
||||||
|
**/.terraform/
|
||||||
|
**/.terraform.lock.hcl
|
||||||
|
*.tfstate
|
||||||
|
*.tfstate.*
|
||||||
|
*.tfvars
|
||||||
|
*.tfvars.json
|
||||||
|
|||||||
@@ -0,0 +1,73 @@
|
|||||||
|
# Git internals accidentally nested in subprojects
|
||||||
|
# Gramps persistent data and postgres cluster
|
||||||
|
apps/gramps/data/
|
||||||
|
apps/gramps/db/
|
||||||
|
|
||||||
|
# Nextcloud persistent data
|
||||||
|
apps/nextcloud/data/
|
||||||
|
apps/nextcloud/database/
|
||||||
|
apps/nextcloud/config/
|
||||||
|
|
||||||
|
# Passbolt secrets and database
|
||||||
|
apps/passbolt/data/
|
||||||
|
|
||||||
|
# Android / Gradle build artifacts
|
||||||
|
apps/shift-recorder/.gradle/
|
||||||
|
apps/shift-recorder/app/build/
|
||||||
|
apps/shift-recorder/build/
|
||||||
|
apps/shift-recorder/app-release-*.apk
|
||||||
|
apps/shift-recorder/app-release-*.aab
|
||||||
|
apps/shift-recorder/*.idsig
|
||||||
|
apps/shift-recorder/android.keystore
|
||||||
|
apps/shift-recorder/manifest-checksum.txt
|
||||||
|
|
||||||
|
apps/stockfill/.gradle/
|
||||||
|
apps/stockfill/app/build/
|
||||||
|
apps/stockfill/build/
|
||||||
|
apps/stockfill/app-release-*.apk
|
||||||
|
apps/stockfill/app-release-*.aab
|
||||||
|
apps/stockfill/*.idsig
|
||||||
|
apps/stockfill/android.keystore
|
||||||
|
apps/stockfill/manifest-checksum.txt
|
||||||
|
|
||||||
|
# Node / frontend artifacts
|
||||||
|
**/node_modules/
|
||||||
|
**/dist/
|
||||||
|
**/.vite/
|
||||||
|
**/coverage/
|
||||||
|
**/playwright-report/
|
||||||
|
**/test-results/
|
||||||
|
|
||||||
|
# Python artifacts
|
||||||
|
**/__pycache__/
|
||||||
|
**/*.pyc
|
||||||
|
|
||||||
|
# IDE files
|
||||||
|
**/.vscode/
|
||||||
|
apps/shift-recorder/
|
||||||
|
#!apps/shift-recorder/.vscode/launch.json
|
||||||
|
apps/stockfill/
|
||||||
|
#!apps/stockfill/.vscode/launch.json
|
||||||
|
#!apps/stockfill/.vscode/settings.json
|
||||||
|
|
||||||
|
# Traefik data
|
||||||
|
core/traefik/data/
|
||||||
|
|
||||||
|
# Authelia sqlite database and notifications
|
||||||
|
core/authelia/data/
|
||||||
|
|
||||||
|
# Crowdsec generated state
|
||||||
|
core/crowdsec/data/
|
||||||
|
core/crowdsec/logs/
|
||||||
|
core/crowdsec/config
|
||||||
|
# Archived service data
|
||||||
|
archive/esphome/data/
|
||||||
|
|
||||||
|
monitoring/gotify/data/
|
||||||
|
monitoring/grafana/data/
|
||||||
|
monitoring/portainer/data/
|
||||||
|
apps/gitea/data/
|
||||||
|
apps/searxng/
|
||||||
|
monitoring/prometheus/data/
|
||||||
|
monitoring/uptime-kuma/data/
|
||||||
|
# Keep compose files, Dockerfiles, configs, scripts, and documentation tracked
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
# ignore all files and directories
|
||||||
|
*
|
||||||
|
# allow git to enter directories
|
||||||
|
!*/
|
||||||
|
venv/
|
||||||
|
core/crowdsec/config
|
||||||
|
database
|
||||||
|
data
|
||||||
|
db
|
||||||
|
docs
|
||||||
|
examples
|
||||||
|
searx
|
||||||
|
tests
|
||||||
|
utils
|
||||||
|
stockfill
|
||||||
|
shift-recorder
|
||||||
|
# keep essential project files
|
||||||
|
!.gitignore
|
||||||
|
!.gitattributes
|
||||||
|
|
||||||
|
# allow YAMLs, shell scripts, and others
|
||||||
|
!*.yml
|
||||||
|
!*.yaml
|
||||||
|
!*.sh
|
||||||
|
!*.py
|
||||||
|
!*.Dockerfile
|
||||||
|
!Dockerfile
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
# Codex instructions for this repository
|
||||||
|
|
||||||
|
This repository contains:
|
||||||
|
- Docker Compose infrastructure
|
||||||
|
- Terraform configuration
|
||||||
|
- Ansible configuration
|
||||||
|
|
||||||
|
## General rules
|
||||||
|
|
||||||
|
Prefer validation and linting over execution.
|
||||||
|
Do not make assumptions about runtime access.
|
||||||
|
Do not run destructive commands.
|
||||||
|
Do not install repo changes unless explicitly requested.
|
||||||
|
|
||||||
|
## Docker / Compose rules
|
||||||
|
|
||||||
|
This environment does not have Docker daemon access.
|
||||||
|
Do not use commands that require `/var/run/docker.sock`.
|
||||||
|
|
||||||
|
Allowed:
|
||||||
|
- `docker compose config`
|
||||||
|
- `docker compose -f <file> config`
|
||||||
|
- `./services-up.sh --profile all config`
|
||||||
|
|
||||||
|
Not allowed:
|
||||||
|
- `docker compose up`
|
||||||
|
- `docker compose down`
|
||||||
|
- `docker compose run`
|
||||||
|
- `docker compose exec`
|
||||||
|
- `docker build`
|
||||||
|
- `docker pull`
|
||||||
|
|
||||||
|
When validating Docker changes:
|
||||||
|
1. Prefer `./services-up.sh --profile all config` if available.
|
||||||
|
2. If that does not fit the task, use `docker compose -f ... config`.
|
||||||
|
3. Only create temporary placeholder env files if validation requires them.
|
||||||
|
4. Do not commit placeholder env files unless explicitly requested.
|
||||||
|
|
||||||
|
## Terraform rules
|
||||||
|
|
||||||
|
Allowed:
|
||||||
|
- `terraform fmt -check -recursive`
|
||||||
|
- `terraform init -backend=false -input=false`
|
||||||
|
- `terraform validate`
|
||||||
|
- `tflint`
|
||||||
|
|
||||||
|
Do not apply infrastructure changes unless explicitly requested.
|
||||||
|
Do not run:
|
||||||
|
- `terraform apply`
|
||||||
|
- `terraform destroy`
|
||||||
|
|
||||||
|
If `terraform init` fails because access to `registry.terraform.io` is forbidden, do not summarize the error vaguely. Report the exact stderr. Continue with:
|
||||||
|
- `terraform fmt -check -recursive`
|
||||||
|
- static review of changed `.tf` files
|
||||||
|
|
||||||
|
Only run `terraform validate` when provider installation is available locally or registry access succeeds.
|
||||||
|
|
||||||
|
## Ansible rules
|
||||||
|
|
||||||
|
Allowed:
|
||||||
|
- `ansible-lint`
|
||||||
|
- `ansible-playbook --syntax-check <playbook>`
|
||||||
|
|
||||||
|
Do not run playbooks against real hosts unless explicitly requested.
|
||||||
|
|
||||||
|
## Shell / YAML rules
|
||||||
|
|
||||||
|
Allowed:
|
||||||
|
- `shellcheck`
|
||||||
|
- `yamllint`
|
||||||
|
- `yq`
|
||||||
|
- `jq`
|
||||||
|
|
||||||
|
## Expected workflow
|
||||||
|
|
||||||
|
When making changes:
|
||||||
|
1. Edit the smallest necessary set of files.
|
||||||
|
2. Run the safest available validation commands.
|
||||||
|
3. Report validation results clearly.
|
||||||
|
4. If validation is blocked by missing secrets, env files, or remote/provider access, say so explicitly instead of guessing.
|
||||||
@@ -0,0 +1,188 @@
|
|||||||
|
# Homelab Docker + Terraform Inventory Repository
|
||||||
|
|
||||||
|
This repository is both:
|
||||||
|
|
||||||
|
1. **operational** (Docker Compose application/runtime definition), and
|
||||||
|
2. **documentary/inventory-oriented** (Terraform capture of Proxmox VMs, host metadata, and selected Docker objects).
|
||||||
|
|
||||||
|
If you only read one section, read **[Source-of-truth boundaries](docs/source-of-truth.md)** first.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick navigation
|
||||||
|
|
||||||
|
- Architecture overview: [docs/architecture.md](docs/architecture.md)
|
||||||
|
- Repository layout: [docs/repo-structure.md](docs/repo-structure.md)
|
||||||
|
- Source-of-truth boundaries and guardrails: [docs/source-of-truth.md](docs/source-of-truth.md)
|
||||||
|
- Docker environment composition and `services-up.sh`: [docs/docker-environment.md](docs/docker-environment.md)
|
||||||
|
- Terraform workflows (brownfield import/reconciliation): [docs/terraform-workflows.md](docs/terraform-workflows.md)
|
||||||
|
- Infrastructure inventory intent and outputs: [docs/infrastructure-inventory.md](docs/infrastructure-inventory.md)
|
||||||
|
- Dynu DNS read-only inventory workflow: [docs/dynu-dns-inventory.md](docs/dynu-dns-inventory.md)
|
||||||
|
- Generated host topology doc: [docs/generated/host-topology.md](docs/generated/host-topology.md)
|
||||||
|
- Ansible bootstrap workflows: [docs/ansible-workflows.md](docs/ansible-workflows.md)
|
||||||
|
- Deployment prerequisites and secrets setup: [docs/deployment-prerequisites.md](docs/deployment-prerequisites.md)
|
||||||
|
- Secrets inventory: [docs/security-secrets.md](docs/security-secrets.md)
|
||||||
|
|
||||||
|
Codex helper scripts:
|
||||||
|
|
||||||
|
- Initial Codex environment/bootstrap setup: [scripts/codex-setup.sh](scripts/codex-setup.sh)
|
||||||
|
- Codex environment maintenance/refresh: [scripts/codex-maintenance.sh](scripts/codex-maintenance.sh)
|
||||||
|
|
||||||
|
Infrastructure subtrees:
|
||||||
|
|
||||||
|
- Ansible foundation docs: [infrastructure/ansible/README.md](infrastructure/ansible/README.md)
|
||||||
|
- Terraform root docs: [infrastructure/terraform/README.md](infrastructure/terraform/README.md)
|
||||||
|
- Terraform Docker mirror: [infrastructure/terraform/docker/README.md](infrastructure/terraform/docker/README.md)
|
||||||
|
- Terraform Proxmox inventory: [infrastructure/terraform/proxmox/README.md](infrastructure/terraform/proxmox/README.md)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Operating model
|
||||||
|
|
||||||
|
### Docker Compose (runtime authority)
|
||||||
|
|
||||||
|
- Compose files under `core/`, `apps/`, and `monitoring/` describe runtime services.
|
||||||
|
- `services-up.sh` composes the environment by discovering compose files and applying common env/network inputs.
|
||||||
|
- For service runtime behavior, start from Compose files and `services-up.sh` (not Terraform).
|
||||||
|
|
||||||
|
|
||||||
|
### Ansible (bootstrap foundation)
|
||||||
|
|
||||||
|
- Ansible under `infrastructure/ansible/` is a phase-1 foundation for inventory/configuration scaffolding.
|
||||||
|
- It supports safe validation (inventory parsing and playbook syntax checks) while hosts/devices are onboarded gradually.
|
||||||
|
- It does not replace Compose runtime authority or Terraform reconciliation authority at this stage.
|
||||||
|
|
||||||
|
### Terraform (inventory and reconciliation authority)
|
||||||
|
|
||||||
|
- Terraform under `infrastructure/terraform/` is used to codify and reconcile existing infrastructure.
|
||||||
|
- Current repo usage emphasizes **brownfield import-first workflows** and safe reconciliation.
|
||||||
|
- Terraform captures:
|
||||||
|
- Proxmox VM configuration for existing VMs.
|
||||||
|
- Physical host metadata in locals/outputs.
|
||||||
|
- Documentation-oriented Docker container mirroring (limited, selective).
|
||||||
|
|
||||||
|
Terraform here is **not** a replacement for Docker Compose deployment.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Guardrails
|
||||||
|
|
||||||
|
- Do not run destructive Terraform commands casually.
|
||||||
|
- Do not treat generated Terraform config as final without manual review.
|
||||||
|
- Do not commit real secrets, credentials, or local state.
|
||||||
|
- Keep one-resource-per-file patterns where already established in Terraform subdirectories.
|
||||||
|
- Prefer shaping outputs for documentation/tooling consumption over dumping raw provider objects.
|
||||||
|
|
||||||
|
See [docs/source-of-truth.md](docs/source-of-truth.md) and [docs/terraform-workflows.md](docs/terraform-workflows.md) for concrete do/don't guidance.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## High-level architecture
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TB
|
||||||
|
Internet((Internet Clients)) -->|HTTPS 443 / HTTP 80| Traefik[Traefik Ingress\nACME TLS + Security Middlewares]
|
||||||
|
|
||||||
|
subgraph DockerHost[Primary Docker Host]
|
||||||
|
Traefik
|
||||||
|
Authelia[Authelia SSO / ForwardAuth]
|
||||||
|
CrowdSec[CrowdSec + Traefik Bouncer]
|
||||||
|
ErrPages[Error Pages Fallback]
|
||||||
|
|
||||||
|
subgraph Apps[Business / User Applications]
|
||||||
|
Nextcloud[Nextcloud]
|
||||||
|
Passbolt[Passbolt]
|
||||||
|
Gitea[Gitea]
|
||||||
|
FamilyTree[Gramps Web]
|
||||||
|
Searxng[SearXNG]
|
||||||
|
end
|
||||||
|
|
||||||
|
subgraph Ops[Operations & Monitoring]
|
||||||
|
Grafana[Grafana]
|
||||||
|
Prometheus[Prometheus]
|
||||||
|
InfluxDB[InfluxDB]
|
||||||
|
NodeRED[Node-RED]
|
||||||
|
Portainer[Portainer]
|
||||||
|
UptimeKuma[Uptime Kuma]
|
||||||
|
Gotify[Gotify Notifications]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
Traefik --> Apps
|
||||||
|
Traefik --> Ops
|
||||||
|
Traefik -->|ForwardAuth for selected routes| Authelia
|
||||||
|
Traefik -->|Threat decisions| CrowdSec
|
||||||
|
Traefik -->|4xx/5xx fallback| ErrPages
|
||||||
|
|
||||||
|
Prometheus --> Grafana
|
||||||
|
Prometheus --> Gotify
|
||||||
|
```
|
||||||
|
|
||||||
|
For request-flow and network detail, see [docs/architecture.md](docs/architecture.md).
|
||||||
|
|
||||||
|
## Public docs publication workflow
|
||||||
|
|
||||||
|
Public docs are generated on the Docker host and committed to this repository. GitHub Actions only publishes committed content from `docs/public`.
|
||||||
|
|
||||||
|
1. Generate public docs locally from the repository root:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./scripts/generate-public-docs.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Inspect the generated changes:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git diff -- docs/public docs/generated docs/diagrams
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Commit the generated public docs (and any supporting generated files you intend to version):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git add docs/public docs/generated docs/diagrams
|
||||||
|
git commit -m "docs: regenerate public docs"
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Push your branch:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git push
|
||||||
|
```
|
||||||
|
|
||||||
|
Only files under `docs/public` are published by GitHub Pages. Internal/generated documentation is not published unless it is deliberately copied/sanitized into `docs/public`.
|
||||||
|
|
||||||
|
### Regenerating architecture docs (Prometheus + Dynu DNS)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Refresh Dynu live inventory and generated resources/import helpers
|
||||||
|
cd infrastructure/terraform/dynu
|
||||||
|
terraform apply -refresh-only
|
||||||
|
python3 scripts/generate-brownfield-records.py --overwrite
|
||||||
|
|
||||||
|
# Regenerate architecture docs from Prometheus + Dynu inventory
|
||||||
|
cd ../../..
|
||||||
|
python3 scripts/render_prometheus_docs.py \
|
||||||
|
--inventory-file docs/runtime/prometheus-inventory.json \
|
||||||
|
--dynu-dns-inventory-file infrastructure/terraform/dynu/generated/dynu_dns_records_inventory.json
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Codex setup and maintenance scripts
|
||||||
|
|
||||||
|
The repository includes helper scripts for Codex sessions that need local tooling and safe placeholder secret material for validation-only workflows:
|
||||||
|
|
||||||
|
- `scripts/codex-setup.sh`
|
||||||
|
- Installs baseline CLI dependencies (shell/yaml/terraform/ansible tooling).
|
||||||
|
- Prepares `secrets/stack-secrets.env` from templates and creates dummy file-based secret placeholders based on `secrets/inventory.json`.
|
||||||
|
- Installs/refreshed baseline Ansible collections when `infrastructure/ansible/collections/requirements.yml` is present.
|
||||||
|
- Runs safe Ansible bootstrap checks (version, inventory parse, playbook syntax check) without live connectivity operations.
|
||||||
|
- Prints installed tool versions for quick verification.
|
||||||
|
|
||||||
|
- `scripts/codex-maintenance.sh`
|
||||||
|
- Refreshes Python-based linting/automation tooling.
|
||||||
|
- Reconciles placeholder secret files against current `secrets/inventory.json` (creates missing, removes stale).
|
||||||
|
- Rebuilds `secrets/stack-secrets.env` with dummy values for compose-config validation.
|
||||||
|
- Refreshes Ansible collections and repeats safe inventory/syntax validation checks.
|
||||||
|
|
||||||
|
Both scripts are intended for local validation environments and should not be treated as production provisioning automation.
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
# Gitea
|
||||||
|
|
||||||
|
## Gitea Actions
|
||||||
|
|
||||||
|
Gitea Actions is enabled by setting:
|
||||||
|
|
||||||
|
- `GITEA__actions__ENABLED=true`
|
||||||
|
|
||||||
|
## Runner service
|
||||||
|
|
||||||
|
The repository includes a dedicated Gitea Actions runner service named:
|
||||||
|
|
||||||
|
- `gitea-runner`
|
||||||
|
|
||||||
|
The runner uses Docker through the existing Docker socket proxy:
|
||||||
|
|
||||||
|
- `DOCKER_HOST=tcp://docker-socket-proxy:2375`
|
||||||
|
|
||||||
|
The runner intentionally **does not** mount:
|
||||||
|
|
||||||
|
- `/var/run/docker.sock`
|
||||||
|
|
||||||
|
## Registration token
|
||||||
|
|
||||||
|
Generate a runner registration token from the Gitea UI:
|
||||||
|
|
||||||
|
- Site Administration → Actions → Runners
|
||||||
|
- or Repo → Settings → Actions → Runners
|
||||||
|
|
||||||
|
Put the token in your env/secrets file:
|
||||||
|
|
||||||
|
- `GITEA_RUNNER_REGISTRATION_TOKEN=...`
|
||||||
|
|
||||||
|
## Start the runner
|
||||||
|
|
||||||
|
- `./services-up.sh --profile gitea up -d gitea-runner`
|
||||||
|
- or `./services-up.sh --profile all up -d gitea-runner`
|
||||||
|
|
||||||
|
## Logs
|
||||||
|
|
||||||
|
- `docker logs -f gitea-runner`
|
||||||
|
|
||||||
|
## Labels
|
||||||
|
|
||||||
|
Common workflow label:
|
||||||
|
|
||||||
|
- `runs-on: ubuntu-latest`
|
||||||
|
|
||||||
|
This should match the configured labels, for example:
|
||||||
|
|
||||||
|
- `GITEA_RUNNER_LABELS=ubuntu-latest:docker://node:20-bookworm,...`
|
||||||
|
|
||||||
|
## Security note
|
||||||
|
|
||||||
|
The runner can control Docker through `docker-socket-proxy`. This is safer than mounting the raw Docker socket directly, but workflows still have meaningful control over Docker. Only trusted repositories/users should be allowed to run workflows on this runner.
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 8af5ca464225c888f5438a0fd226937e2ccabca4 Gitea <gitea@fake.local> 1757451832 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 bb76e891d59a88beeb4f5b233cbecfd94a8f0cae Gitea <gitea@fake.local> 1756461833 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 f5998826f6b9ed820f5a97a67ce11402e72fd2f3 Gitea <gitea@fake.local> 1756461833 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 791c61ee8129113a02d67ffa171ed48843cdf025 Gitea <gitea@fake.local> 1756815232 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 e76063242b47225e6614f17749b1bf2ba0b70ac9 Gitea <gitea@fake.local> 1757407432 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 1c93f2ab9ceef7601c5db93de0ddb057aef5b4b1 Gitea <gitea@fake.local> 1756433632 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 0404d47f0ba21269865f307f3fd53e746ff155a4 Gitea <gitea@fake.local> 1756436033 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 80111cd7579abc6319f5d357da060db8186babaf Gitea <gitea@fake.local> 1758786607 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 0fea5ebd8be8d93f95630bdc5cc9ecc0b0bbac43 Gitea <gitea@fake.local> 1756949032 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 7b87a0e2a6c03e5344da2fe6a391c1f1fb269b5c Gitea <gitea@fake.local> 1756851832 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0000000000000000000000000000000000000000 bde15e4b736be753b1272a3277f528d7eb75d371 Gitea <gitea@fake.local> 1756508033 +0000 fetch --prune --tags origin: storing head
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
8af5ca464225c888f5438a0fd226937e2ccabca4
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
bb76e891d59a88beeb4f5b233cbecfd94a8f0cae
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
f5998826f6b9ed820f5a97a67ce11402e72fd2f3
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
791c61ee8129113a02d67ffa171ed48843cdf025
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
e76063242b47225e6614f17749b1bf2ba0b70ac9
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
1c93f2ab9ceef7601c5db93de0ddb057aef5b4b1
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0404d47f0ba21269865f307f3fd53e746ff155a4
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
80111cd7579abc6319f5d357da060db8186babaf
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
0fea5ebd8be8d93f95630bdc5cc9ecc0b0bbac43
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
7b87a0e2a6c03e5344da2fe6a391c1f1fb269b5c
|
|
||||||
-1
@@ -1 +0,0 @@
|
|||||||
bde15e4b736be753b1272a3277f528d7eb75d371
|
|
||||||
@@ -5,10 +5,11 @@ services:
|
|||||||
image: gitea/gitea:latest # change to 1-rootless once find out how to move data.
|
image: gitea/gitea:latest # change to 1-rootless once find out how to move data.
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
- USER_UID=1000
|
- USER_UID=${GITEA_USER_UID}
|
||||||
- USER_GID=1000
|
- USER_GID=${GITEA_USER_GID}
|
||||||
- GITEA__database__DB_TYPE=sqlite3
|
- GITEA__database__DB_TYPE=${GITEA_DB_TYPE}
|
||||||
- GITEA__server__ROOT_URL=https://gitea.lan.ddnsgeek.com/
|
- GITEA__server__ROOT_URL=${GITEA_ROOT_URL}
|
||||||
|
- GITEA__actions__ENABLED=true
|
||||||
volumes:
|
volumes:
|
||||||
- ${PROJECT_ROOT}/apps/gitea/data:/data
|
- ${PROJECT_ROOT}/apps/gitea/data:/data
|
||||||
networks:
|
networks:
|
||||||
@@ -31,6 +32,25 @@ services:
|
|||||||
retries: 6
|
retries: 6
|
||||||
start_period: 120s
|
start_period: 120s
|
||||||
|
|
||||||
|
gitea-runner:
|
||||||
|
profiles: ["apps","all","gitea","ci"]
|
||||||
|
container_name: gitea-runner
|
||||||
|
image: gitea/act_runner:latest
|
||||||
|
restart: always
|
||||||
|
depends_on:
|
||||||
|
- gitea
|
||||||
|
- docker-socket-proxy
|
||||||
|
environment:
|
||||||
|
- GITEA_INSTANCE_URL=${GITEA_ROOT_URL}
|
||||||
|
- GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}
|
||||||
|
- GITEA_RUNNER_NAME=${GITEA_RUNNER_NAME}
|
||||||
|
- GITEA_RUNNER_LABELS=${GITEA_RUNNER_LABELS}
|
||||||
|
- DOCKER_HOST=${DOCKER_SOCKET_PROXY_HOST}
|
||||||
|
volumes:
|
||||||
|
- ${PROJECT_ROOT}/apps/gitea/runner-data:/data
|
||||||
|
networks:
|
||||||
|
- traefik
|
||||||
|
|
||||||
#volumes:
|
#volumes:
|
||||||
# gitea_data:
|
# gitea_data:
|
||||||
|
|
||||||
|
|||||||
@@ -1,47 +1,39 @@
|
|||||||
services:
|
services:
|
||||||
gramps-db:
|
|
||||||
profiles: ["apps","all","gramps"]
|
|
||||||
image: postgres:13
|
|
||||||
container_name: gramps-db
|
|
||||||
restart: always
|
|
||||||
environment:
|
|
||||||
POSTGRES_USER: gramps
|
|
||||||
POSTGRES_PASSWORD: grampspassword
|
|
||||||
POSTGRES_DB: gramps
|
|
||||||
volumes:
|
|
||||||
- ${PROJECT_ROOT}/apps/gramps/db:/var/lib/postgresql
|
|
||||||
networks:
|
|
||||||
- gramps
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "pg_isready -h db -p 5432 -U gramps -d gramps"]
|
|
||||||
interval: 10s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 12
|
|
||||||
start_period: 30s
|
|
||||||
|
|
||||||
|
|
||||||
grampsweb:
|
grampsweb:
|
||||||
profiles: ["apps","all","gramps"]
|
profiles: ["apps","all","gramps"]
|
||||||
image: ghcr.io/gramps-project/grampsweb:latest
|
image: ghcr.io/gramps-project/grampsweb:latest
|
||||||
container_name: gramps-web
|
container_name: gramps-web
|
||||||
depends_on:
|
depends_on:
|
||||||
- gramps-db
|
- gramps-redis
|
||||||
|
- grampsweb_celery
|
||||||
restart: always
|
restart: always
|
||||||
# ports:
|
# env_file:
|
||||||
# - "5000:5000" # access via http://localhost:5000
|
# - ${SECRETS_ENV_FILE}
|
||||||
environment:
|
environment:
|
||||||
DB_URI: postgresql://gramps:grampspassword@db:5432/gramps
|
TZ: ${TZ}
|
||||||
GRAMPSWEB_LOGLEVEL: INFO
|
GRAMPSWEB_TREE: ${GRAMPSWEB_TREE}
|
||||||
# default admin user created on first run:
|
GRAMPSWEB_BASE_URL: ${GRAMPSWEB_BASE_URL}
|
||||||
INITIAL_ADMIN: admin
|
GRAMPSWEB_SECRET_KEY: ${GRAMPSWEB_SECRET_KEY}
|
||||||
INITIAL_ADMIN_PASSWORD: admin
|
GRAMPSWEB_REGISTRATION_DISABLED: ${GRAMPSWEB_REGISTRATION_DISABLED}
|
||||||
# optional: storage paths inside container
|
GRAMPSWEB_EMAIL_HOST: ${GRAMPSWEB_EMAIL_HOST}
|
||||||
GRAMPSWEB_MEDIAPATH: /app/media
|
GRAMPSWEB_EMAIL_PORT: ${GRAMPSWEB_EMAIL_PORT}
|
||||||
GRAMPSWEB_TREE: "main"
|
GRAMPSWEB_EMAIL_HOST_USER: ${GRAMPSWEB_EMAIL_HOST_USER}
|
||||||
|
GRAMPSWEB_EMAIL_HOST_PASSWORD: ${GRAMPSWEB_EMAIL_HOST_PASSWORD}
|
||||||
|
GRAMPSWEB_EMAIL_USE_SSL: ${GRAMPSWEB_EMAIL_USE_SSL}
|
||||||
|
GRAMPSWEB_EMAIL_USE_STARTTLS: ${GRAMPSWEB_EMAIL_USE_STARTTLS}
|
||||||
|
GRAMPSWEB_DEFAULT_FROM_EMAIL: ${GRAMPSWEB_DEFAULT_FROM_EMAIL}
|
||||||
|
GRAMPSWEB_CELERY_CONFIG__broker_url: redis://gramps-redis:6379/0
|
||||||
|
GRAMPSWEB_CELERY_CONFIG__result_backend: redis://gramps-redis:6379/0
|
||||||
|
GRAMPSWEB_RATELIMIT_STORAGE_URI: redis://gramps-redis:6379/1
|
||||||
volumes:
|
volumes:
|
||||||
- ${PROJECT_ROOT}/apps/gramps/data/users:/app/users
|
- ${PROJECT_ROOT}/apps/gramps/data/users:/app/users
|
||||||
- ${PROJECT_ROOT}/apps/gramps/data/media:/app/media
|
- ${PROJECT_ROOT}/apps/gramps/data/index:/app/indexdir
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/thumbnail_cache:/app/thumbnail_cache
|
||||||
- ${PROJECT_ROOT}/apps/gramps/data/cache:/app/cache
|
- ${PROJECT_ROOT}/apps/gramps/data/cache:/app/cache
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/secret:/app/secret
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/db:/root/.gramps/grampsdb
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/media:/app/media
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/tmp:/tmp
|
||||||
labels:
|
labels:
|
||||||
- "traefik.http.routers.gramps.rule=Host(`familytree.lan.ddnsgeek.com`)"
|
- "traefik.http.routers.gramps.rule=Host(`familytree.lan.ddnsgeek.com`)"
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
@@ -62,10 +54,67 @@ services:
|
|||||||
retries: 6
|
retries: 6
|
||||||
start_period: 60s
|
start_period: 60s
|
||||||
|
|
||||||
|
grampsweb_celery:
|
||||||
|
profiles: ["apps","all","gramps"]
|
||||||
|
image: ghcr.io/gramps-project/grampsweb:latest
|
||||||
|
container_name: gramps-web-celery
|
||||||
|
command: celery -A gramps_webapi.celery worker --loglevel=INFO --concurrency=2
|
||||||
|
depends_on:
|
||||||
|
- gramps-redis
|
||||||
|
restart: always
|
||||||
|
# env_file:
|
||||||
|
# - ${SECRETS_ENV_FILE}
|
||||||
|
environment:
|
||||||
|
TZ: ${TZ}
|
||||||
|
GRAMPSWEB_TREE: ${GRAMPSWEB_TREE}
|
||||||
|
GRAMPSWEB_BASE_URL: ${GRAMPSWEB_BASE_URL}
|
||||||
|
GRAMPSWEB_SECRET_KEY: ${GRAMPSWEB_SECRET_KEY}
|
||||||
|
GRAMPSWEB_REGISTRATION_DISABLED: ${GRAMPSWEB_REGISTRATION_DISABLED}
|
||||||
|
GRAMPSWEB_EMAIL_HOST: ${GRAMPSWEB_EMAIL_HOST}
|
||||||
|
GRAMPSWEB_EMAIL_PORT: ${GRAMPSWEB_EMAIL_PORT}
|
||||||
|
GRAMPSWEB_EMAIL_HOST_USER: ${GRAMPSWEB_EMAIL_HOST_USER}
|
||||||
|
GRAMPSWEB_EMAIL_HOST_PASSWORD: ${GRAMPSWEB_EMAIL_HOST_PASSWORD}
|
||||||
|
GRAMPSWEB_EMAIL_USE_SSL: ${GRAMPSWEB_EMAIL_USE_SSL}
|
||||||
|
GRAMPSWEB_EMAIL_USE_STARTTLS: ${GRAMPSWEB_EMAIL_USE_STARTTLS}
|
||||||
|
GRAMPSWEB_DEFAULT_FROM_EMAIL: ${GRAMPSWEB_DEFAULT_FROM_EMAIL}
|
||||||
|
GRAMPSWEB_CELERY_CONFIG__broker_url: redis://gramps-redis:6379/0
|
||||||
|
GRAMPSWEB_CELERY_CONFIG__result_backend: redis://gramps-redis:6379/0
|
||||||
|
GRAMPSWEB_RATELIMIT_STORAGE_URI: redis://gramps-redis:6379/1
|
||||||
|
volumes:
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/users:/app/users
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/index:/app/indexdir
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/thumbnail_cache:/app/thumbnail_cache
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/cache:/app/cache
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/secret:/app/secret
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/db:/root/.gramps/grampsdb
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/media:/app/media
|
||||||
|
- ${PROJECT_ROOT}/apps/gramps/data/tmp:/tmp
|
||||||
|
networks:
|
||||||
|
- gramps
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
- CMD-SHELL
|
||||||
|
- pgrep -f "celery.*gramps_webapi.celery.*worker" >/dev/null
|
||||||
|
interval: 30s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 6
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
gramps-redis:
|
||||||
|
profiles: ["apps","all","gramps"]
|
||||||
|
image: valkey/valkey:8-alpine
|
||||||
|
container_name: gramps-redis
|
||||||
|
restart: always
|
||||||
|
networks:
|
||||||
|
- gramps
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
- CMD-SHELL
|
||||||
|
- valkey-cli -h 127.0.0.1 -p 6379 ping | grep -q PONG
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 6
|
||||||
|
start_period: 10s
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
# traefik_reverse_proxy:
|
|
||||||
# external: true
|
|
||||||
gramps:
|
gramps:
|
||||||
# driver: bridge
|
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
services:
|
services:
|
||||||
nextcloud-webapp:
|
nextcloud-webapp:
|
||||||
# image: nextcloud:production
|
|
||||||
profiles: ["apps","all","nextcloud"]
|
profiles: ["apps","all","nextcloud"]
|
||||||
build:
|
build:
|
||||||
context: ${PROJECT_ROOT}/apps/nextcloud
|
context: ${PROJECT_ROOT}/apps/nextcloud
|
||||||
container_name: nextcloud-webapp
|
container_name: nextcloud-webapp
|
||||||
restart: always
|
restart: always
|
||||||
hostname: nextcloud.lan.ddnsgeek.com
|
hostname: ${NEXTCLOUD_TRUSTED_DOMAINS}
|
||||||
|
# env_file:
|
||||||
|
# - ${SECRETS_ENV_FILE}
|
||||||
volumes:
|
volumes:
|
||||||
- ${PROJECT_ROOT}/apps/nextcloud/data:/var/www/html/data:rw
|
- ${PROJECT_ROOT}/apps/nextcloud/data:/var/www/html/data:rw
|
||||||
- ${PROJECT_ROOT}/apps/nextcloud/config:/var/www/html/config:rw
|
- ${PROJECT_ROOT}/apps/nextcloud/config:/var/www/html/config:rw
|
||||||
@@ -16,31 +17,33 @@ services:
|
|||||||
- nextcloud-db
|
- nextcloud-db
|
||||||
- nextcloud-redis
|
- nextcloud-redis
|
||||||
environment:
|
environment:
|
||||||
- MYSQL_PASSWORD=R1m@dmin
|
- MYSQL_PASSWORD_FILE=/run/secrets/nextcloud_db_password
|
||||||
- MYSQL_DATABASE=nextcloud
|
- MYSQL_DATABASE=${NEXTCLOUD_MYSQL_DATABASE}
|
||||||
- MYSQL_USER=nextcloud
|
- MYSQL_USER=${NEXTCLOUD_DB_USER}
|
||||||
- MYSQL_HOST=nextcloud_db:3306
|
- MYSQL_HOST=${NEXTCLOUD_MYSQL_HOST}
|
||||||
- NEXTCLOUD_TRUSTED_DOMAINS=nextcloud.lan.ddnsgeek.com
|
- NEXTCLOUD_TRUSTED_DOMAINS=${NEXTCLOUD_TRUSTED_DOMAINS}
|
||||||
- OVERWRITEPROTOCOL=https
|
- OVERWRITEPROTOCOL=${NEXTCLOUD_OVERWRITEPROTOCOL}
|
||||||
- OVERWRITECLIURL=https://nextcloud.lan.ddnsgeek.com
|
- OVERWRITECLIURL=${NEXTCLOUD_OVERWRITECLIURL}
|
||||||
|
- SMTP_HOST=${NEXTCLOUD_SMTP_HOST}
|
||||||
- SMTP_HOST=smtp.gmail.com
|
- SMTP_SECURE=${NEXTCLOUD_SMTP_SECURE}
|
||||||
- SMTP_SECURE=tls
|
- SMTP_PORT=${NEXTCLOUD_SMTP_PORT}
|
||||||
- SMTP_PORT=587
|
- SMTP_AUTHTYPE=${NEXTCLOUD_SMTP_AUTHTYPE}
|
||||||
- SMTP_AUTHTYPE=login
|
- MAIL_FROM_ADDRESS=${NEXTCLOUD_SMTP_FROM_ADDRESS}
|
||||||
- MAIL_FROM_ADDRESS=beatz174
|
- MAIL_DOMAIN=${NEXTCLOUD_SMTP_DOMAIN}
|
||||||
- MAIL_DOMAIN=gmail.com
|
- SMTP_NAME=${NEXTCLOUD_SMTP_NAME}
|
||||||
- SMTP_NAME=beatz174@gmail.com
|
- SMTP_PASSWORD_FILE=/run/secrets/nextcloud_smtp_password
|
||||||
- SMTP_PASSWORD=kqdw fvml wlag ldgv
|
- REDIS_HOST=${NEXTCLOUD_REDIS_HOST}
|
||||||
|
- REDIS_HOST_PORT=${NEXTCLOUD_REDIS_HOST_PORT}
|
||||||
- REDIS_HOST=redis
|
- REDIS_HOST_PASSWORD_FILE=/run/secrets/nextcloud_redis_password
|
||||||
- REDIS_HOST_PORT=6379
|
secrets:
|
||||||
- REDIS_HOST_PASSWORD=TzBF8wcJNmVd9p2CTmBejPS9dpye6kWQeH3DmrQS9TPfTRriSHFN5VqH4CgzcuVZYWH2GBb7QU5GuEpNDGYdKjM6hjmLyjSgCFMiPms3Hv9n
|
- nextcloud_db_password
|
||||||
|
- nextcloud_smtp_password
|
||||||
|
- nextcloud_redis_password
|
||||||
networks:
|
networks:
|
||||||
- traefik
|
- traefik
|
||||||
- nextcloud
|
- nextcloud
|
||||||
labels:
|
labels:
|
||||||
- "traefik.http.routers.nextcloud.rule=Host(`nextcloud.lan.ddnsgeek.com`)"
|
- "traefik.http.routers.nextcloud.rule=Host(`${NEXTCLOUD_TRUSTED_DOMAINS}`)"
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- "traefik.http.routers.nextcloud.entrypoints=websecure"
|
- "traefik.http.routers.nextcloud.entrypoints=websecure"
|
||||||
- "traefik.http.routers.nextcloud.tls.certresolver=myresolver"
|
- "traefik.http.routers.nextcloud.tls.certresolver=myresolver"
|
||||||
@@ -54,7 +57,6 @@ services:
|
|||||||
- "traefik.http.middlewares.nextcloud-webfinger.redirectregex.regex=https://(.*)/.well-known/webfinger"
|
- "traefik.http.middlewares.nextcloud-webfinger.redirectregex.regex=https://(.*)/.well-known/webfinger"
|
||||||
- "traefik.http.middlewares.nextcloud-webfinger.redirectregex.replacement=https://$${1}/nextcloud/index.php/.well-known/webfinger"
|
- "traefik.http.middlewares.nextcloud-webfinger.redirectregex.replacement=https://$${1}/nextcloud/index.php/.well-known/webfinger"
|
||||||
- "traefik.docker.network=core_traefik"
|
- "traefik.docker.network=core_traefik"
|
||||||
|
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test:
|
test:
|
||||||
- CMD-SHELL
|
- CMD-SHELL
|
||||||
@@ -68,9 +70,6 @@ services:
|
|||||||
retries: 6
|
retries: 6
|
||||||
start_period: 180s
|
start_period: 180s
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
nextcloud-db:
|
nextcloud-db:
|
||||||
image: mariadb:11.4
|
image: mariadb:11.4
|
||||||
restart: always
|
restart: always
|
||||||
@@ -78,36 +77,41 @@ services:
|
|||||||
container_name: nextcloud-db
|
container_name: nextcloud-db
|
||||||
hostname: nextcloud_db
|
hostname: nextcloud_db
|
||||||
command: --transaction-isolation=READ-COMMITTED --log-bin=binlog --binlog-format=ROW
|
command: --transaction-isolation=READ-COMMITTED --log-bin=binlog --binlog-format=ROW
|
||||||
|
# env_file:
|
||||||
|
# - ${PROJECT_ROOT}/secrets/stack-secrets.env
|
||||||
volumes:
|
volumes:
|
||||||
- ${PROJECT_ROOT}/apps/nextcloud/database:/var/lib/mysql:rw
|
- ${PROJECT_ROOT}/apps/nextcloud/database:/var/lib/mysql:rw
|
||||||
environment:
|
environment:
|
||||||
- MYSQL_ROOT_PASSWORD=R1m@dmin
|
- MYSQL_ROOT_PASSWORD_FILE=/run/secrets/nextcloud_db_root_password
|
||||||
- MYSQL_PASSWORD=R1m@dmin
|
- MYSQL_PASSWORD_FILE=/run/secrets/nextcloud_db_password
|
||||||
- MYSQL_DATABASE=nextcloud
|
- MYSQL_DATABASE=${NEXTCLOUD_MYSQL_DATABASE}
|
||||||
- MYSQL_USER=nextcloud
|
- MYSQL_USER=${NEXTCLOUD_DB_USER}
|
||||||
- MARIADB_AUTO_UPGRADE=1
|
- MARIADB_AUTO_UPGRADE=${NEXTCLOUD_MARIADB_AUTO_UPGRADE}
|
||||||
- NEXTCLOUD_ADMIN_USER=admin
|
- NEXTCLOUD_ADMIN_USER=${NEXTCLOUD_ADMIN_USER}
|
||||||
- NEXTCLOUD_ADMIN_PASSWORD=R1m@dmin
|
- NEXTCLOUD_ADMIN_PASSWORD_FILE=/run/secrets/nextcloud_admin_password
|
||||||
|
secrets:
|
||||||
|
- nextcloud_db_root_password
|
||||||
|
- nextcloud_db_password
|
||||||
|
- nextcloud_admin_password
|
||||||
networks:
|
networks:
|
||||||
- nextcloud
|
- nextcloud
|
||||||
labels:
|
labels:
|
||||||
- "io.portainer.accesscontrol.public"
|
- "io.portainer.accesscontrol.public"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "mariadb-admin ping -u nextcloud --password=R1m@dmin --silent"]
|
test: ["CMD-SHELL", "mariadb-admin ping -u $$MYSQL_USER --password=$$(cat /run/secrets/nextcloud_db_password) --silent"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 12
|
retries: 12
|
||||||
start_period: 60s
|
start_period: 60s
|
||||||
|
|
||||||
|
|
||||||
nextcloud-redis:
|
nextcloud-redis:
|
||||||
image: "redis"
|
image: "redis"
|
||||||
profiles: ["apps","all","nextcloud"]
|
profiles: ["apps","all","nextcloud"]
|
||||||
command: ["redis-server", "--requirepass", "TzBF8wcJNmVd9p2CTmBejPS9dpye6kWQeH3DmrQS9TPfTRriSHFN5VqH4CgzcuVZYWH2GBb7QU5GuEpNDGYdKjM6hjmLyjSgCFMiPms3Hv9n", "--appendonly", "yes", "--save", "60", "1000"]
|
command: ["sh", "-c", "redis-server --requirepass \"$$(cat /run/secrets/nextcloud_redis_password)\" --appendonly yes --save 60 1000"]
|
||||||
hostname: redis
|
hostname: redis
|
||||||
container_name: nextcloud-redis
|
container_name: nextcloud-redis
|
||||||
environment:
|
secrets:
|
||||||
- REDIS_HOST_PASSWORD=TzBF8wcJNmVd9p2CTmBejPS9dpye6kWQeH3DmrQS9TPfTRriSHFN5VqH4CgzcuVZYWH2GBb7QU5GuEpNDGYdKjM6hjmLyjSgCFMiPms3Hv9n
|
- nextcloud_redis_password
|
||||||
volumes:
|
volumes:
|
||||||
- ${PROJECT_ROOT}/apps/nextcloud/data/redis:/data:rw
|
- ${PROJECT_ROOT}/apps/nextcloud/data/redis:/data:rw
|
||||||
restart: always
|
restart: always
|
||||||
@@ -116,15 +120,23 @@ services:
|
|||||||
labels:
|
labels:
|
||||||
- "io.portainer.accesscontrol.public"
|
- "io.portainer.accesscontrol.public"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "redis-cli -a TzBF8wcJNmVd9p2CTmBejPS9dpye6kWQeH3DmrQS9TPfTRriSHFN5VqH4CgzcuVZYWH2GBb7QU5GuEpNDGYdKjM6hjmLyjSgCFMiPms3Hv9n PING | grep -q PONG"]
|
test: ["CMD-SHELL", "redis-cli -a \"$$(cat /run/secrets/nextcloud_redis_password)\" PING | grep -q PONG"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 6
|
retries: 6
|
||||||
start_period: 10s
|
start_period: 10s
|
||||||
|
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
# traefik_reverse_proxy:
|
|
||||||
# external: true
|
|
||||||
nextcloud:
|
nextcloud:
|
||||||
# driver: bridge
|
|
||||||
|
secrets:
|
||||||
|
nextcloud_db_root_password:
|
||||||
|
file: ${PROJECT_ROOT}/secrets/nextcloud_db_root_password.txt
|
||||||
|
nextcloud_db_password:
|
||||||
|
file: ${PROJECT_ROOT}/secrets/nextcloud_db_password.txt
|
||||||
|
nextcloud_admin_password:
|
||||||
|
file: ${PROJECT_ROOT}/secrets/nextcloud_admin_password.txt
|
||||||
|
nextcloud_smtp_password:
|
||||||
|
file: ${PROJECT_ROOT}/secrets/nextcloud_smtp_password.txt
|
||||||
|
nextcloud_redis_password:
|
||||||
|
file: ${PROJECT_ROOT}/secrets/nextcloud_redis_password.txt
|
||||||
|
|||||||
@@ -4,17 +4,21 @@ services:
|
|||||||
container_name: passbolt-db
|
container_name: passbolt-db
|
||||||
image: mariadb:12
|
image: mariadb:12
|
||||||
restart: always
|
restart: always
|
||||||
|
# env_file:
|
||||||
|
# - ${PROJECT_ROOT}/secrets/stack-secrets.env
|
||||||
environment:
|
environment:
|
||||||
MYSQL_RANDOM_ROOT_PASSWORD: "true"
|
MYSQL_RANDOM_ROOT_PASSWORD: ${PASSBOLT_MYSQL_RANDOM_ROOT_PASSWORD}
|
||||||
MYSQL_DATABASE: "passbolt"
|
MYSQL_DATABASE: ${PASSBOLT_DB_NAME}
|
||||||
MYSQL_USER: "passbolt"
|
MYSQL_USER: ${PASSBOLT_DB_USER}
|
||||||
MYSQL_PASSWORD: "P4ssb0lt"
|
MYSQL_PASSWORD_FILE: /run/secrets/passbolt_db_password
|
||||||
|
secrets:
|
||||||
|
- passbolt_db_password
|
||||||
volumes:
|
volumes:
|
||||||
- ${PROJECT_ROOT}/apps/passbolt/data/database:/var/lib/mysql
|
- ${PROJECT_ROOT}/apps/passbolt/data/database:/var/lib/mysql
|
||||||
networks:
|
networks:
|
||||||
- passbolt
|
- passbolt
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "mariadb-admin ping -h 127.0.0.1 -u\"$$MARIADB_USER\" -p\"$$MARIADB_PASSWORD\" --silent"]
|
test: ["CMD-SHELL", "mariadb-admin ping -h 127.0.0.1 -u\"$$MYSQL_USER\" -p\"$$(cat /run/secrets/passbolt_db_password)\" --silent"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 12
|
retries: 12
|
||||||
@@ -22,22 +26,24 @@ services:
|
|||||||
labels:
|
labels:
|
||||||
- "io.portainer.accesscontrol.public"
|
- "io.portainer.accesscontrol.public"
|
||||||
|
|
||||||
|
|
||||||
passbolt-webapp:
|
passbolt-webapp:
|
||||||
image: passbolt/passbolt:latest-ce
|
image: passbolt/passbolt:latest-ce
|
||||||
profiles: ["apps","all","passbolt"]
|
profiles: ["apps","all","passbolt"]
|
||||||
container_name: passbolt-webapp
|
container_name: passbolt-webapp
|
||||||
#Alternatively you can use rootless:
|
|
||||||
restart: always
|
restart: always
|
||||||
depends_on:
|
depends_on:
|
||||||
- passbolt-db
|
- passbolt-db
|
||||||
|
# env_file:
|
||||||
|
# - ${PROJECT_ROOT}/secrets/stack-secrets.env
|
||||||
environment:
|
environment:
|
||||||
APP_FULL_BASE_URL: https://passbolt.lan.ddnsgeek.com
|
APP_FULL_BASE_URL: ${PASSBOLT_APP_FULL_BASE_URL}
|
||||||
DATASOURCES_DEFAULT_HOST: "passbolt-db"
|
DATASOURCES_DEFAULT_HOST: ${PASSBOLT_DATASOURCES_DEFAULT_HOST}
|
||||||
DATASOURCES_DEFAULT_USERNAME: "passbolt"
|
DATASOURCES_DEFAULT_USERNAME: ${PASSBOLT_DB_USER}
|
||||||
DATASOURCES_DEFAULT_PASSWORD: "P4ssb0lt"
|
DATASOURCES_DEFAULT_PASSWORD_FILE: /run/secrets/passbolt_db_password
|
||||||
DATASOURCES_DEFAULT_DATABASE: "passbolt"
|
DATASOURCES_DEFAULT_DATABASE: ${PASSBOLT_DB_NAME}
|
||||||
PASSBOLT_GPG_SERVER_KEY_FINGERPRINT: "CBBB2B8F3E9FACA114537ACB8965B750F7363586"
|
PASSBOLT_GPG_SERVER_KEY_FINGERPRINT: ${PASSBOLT_GPG_SERVER_KEY_FINGERPRINT}
|
||||||
|
secrets:
|
||||||
|
- passbolt_db_password
|
||||||
volumes:
|
volumes:
|
||||||
- ${PROJECT_ROOT}/apps/passbolt/data/gpg:/etc/passbolt/gpg
|
- ${PROJECT_ROOT}/apps/passbolt/data/gpg:/etc/passbolt/gpg
|
||||||
- ${PROJECT_ROOT}/apps/passbolt/data/jwt:/etc/passbolt/jwt
|
- ${PROJECT_ROOT}/apps/passbolt/data/jwt:/etc/passbolt/jwt
|
||||||
@@ -60,20 +66,16 @@ services:
|
|||||||
- "traefik.http.routers.passbolt.tls.certresolver=myresolver"
|
- "traefik.http.routers.passbolt.tls.certresolver=myresolver"
|
||||||
- "io.portainer.accesscontrol.public"
|
- "io.portainer.accesscontrol.public"
|
||||||
- "traefik.docker.network=core_traefik"
|
- "traefik.docker.network=core_traefik"
|
||||||
|
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "curl -fsS http://localhost/healthcheck/status | grep -qx OK"]
|
test: ["CMD-SHELL", "curl -fsS http://localhost/healthcheck/status | grep -qx OK"]
|
||||||
# su -s /bin/sh -c "/usr/share/php/passbolt/bin/cake passbolt healthcheck" www-data
|
|
||||||
# | grep -q "No error found"
|
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 6
|
retries: 6
|
||||||
start_period: 120s
|
start_period: 120s
|
||||||
|
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
# traefik_reverse_proxy:
|
|
||||||
# external: true
|
|
||||||
# internal:
|
|
||||||
# driver: bridge
|
|
||||||
passbolt:
|
passbolt:
|
||||||
|
|
||||||
|
secrets:
|
||||||
|
passbolt_db_password:
|
||||||
|
file: ${PROJECT_ROOT}/secrets/passbolt_db_password.txt
|
||||||
|
|||||||
@@ -1,178 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
help() {
|
|
||||||
cat <<EOF
|
|
||||||
Command line:
|
|
||||||
-h Display this help
|
|
||||||
-d Dry run to update the configuration files.
|
|
||||||
-f Always update on the configuration files (existing files are renamed with
|
|
||||||
the .old suffix). Without this option, the new configuration files are
|
|
||||||
copied with the .new suffix
|
|
||||||
Environment variables:
|
|
||||||
INSTANCE_NAME settings.yml : general.instance_name
|
|
||||||
AUTOCOMPLETE settings.yml : search.autocomplete
|
|
||||||
BASE_URL settings.yml : server.base_url
|
|
||||||
MORTY_URL settings.yml : result_proxy.url
|
|
||||||
MORTY_KEY settings.yml : result_proxy.key
|
|
||||||
BIND_ADDRESS uwsgi bind to the specified TCP socket using HTTP protocol.
|
|
||||||
Default value: ${DEFAULT_BIND_ADDRESS}
|
|
||||||
Volume:
|
|
||||||
/etc/searxng the docker entry point copies settings.yml and uwsgi.ini in
|
|
||||||
this directory (see the -f command line option)"
|
|
||||||
|
|
||||||
EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
export DEFAULT_BIND_ADDRESS="0.0.0.0:8080"
|
|
||||||
export BIND_ADDRESS="${BIND_ADDRESS:-${DEFAULT_BIND_ADDRESS}}"
|
|
||||||
|
|
||||||
# Parse command line
|
|
||||||
FORCE_CONF_UPDATE=0
|
|
||||||
DRY_RUN=0
|
|
||||||
|
|
||||||
while getopts "fdh" option
|
|
||||||
do
|
|
||||||
case $option in
|
|
||||||
|
|
||||||
f) FORCE_CONF_UPDATE=1 ;;
|
|
||||||
d) DRY_RUN=1 ;;
|
|
||||||
|
|
||||||
h)
|
|
||||||
help
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "unknow option ${option}"
|
|
||||||
exit 42
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
get_searxng_version(){
|
|
||||||
su searxng -c \
|
|
||||||
'python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)"' \
|
|
||||||
2>/dev/null
|
|
||||||
}
|
|
||||||
|
|
||||||
SEARXNG_VERSION="$(get_searxng_version)"
|
|
||||||
export SEARXNG_VERSION
|
|
||||||
echo "SearXNG version ${SEARXNG_VERSION}"
|
|
||||||
|
|
||||||
# helpers to update the configuration files
|
|
||||||
patch_uwsgi_settings() {
|
|
||||||
CONF="$1"
|
|
||||||
|
|
||||||
# update uwsg.ini
|
|
||||||
sed -i \
|
|
||||||
-e "s|workers = .*|workers = ${UWSGI_WORKERS:-%k}|g" \
|
|
||||||
-e "s|threads = .*|threads = ${UWSGI_THREADS:-4}|g" \
|
|
||||||
"${CONF}"
|
|
||||||
}
|
|
||||||
|
|
||||||
patch_searxng_settings() {
|
|
||||||
CONF="$1"
|
|
||||||
|
|
||||||
# Make sure that there is trailing slash at the end of BASE_URL
|
|
||||||
# see https://www.gnu.org/savannah-checkouts/gnu/bash/manual/bash.html#Shell-Parameter-Expansion
|
|
||||||
export BASE_URL="${BASE_URL%/}/"
|
|
||||||
|
|
||||||
# update settings.yml
|
|
||||||
sed -i \
|
|
||||||
-e "s|base_url: false|base_url: ${BASE_URL}|g" \
|
|
||||||
-e "s/instance_name: \"SearXNG\"/instance_name: \"${INSTANCE_NAME}\"/g" \
|
|
||||||
-e "s/autocomplete: \"\"/autocomplete: \"${AUTOCOMPLETE}\"/g" \
|
|
||||||
-e "s/ultrasecretkey/$(openssl rand -hex 32)/g" \
|
|
||||||
"${CONF}"
|
|
||||||
|
|
||||||
# Morty configuration
|
|
||||||
|
|
||||||
if [ -n "${MORTY_KEY}" ] && [ -n "${MORTY_URL}" ]; then
|
|
||||||
sed -i -e "s/image_proxy: false/image_proxy: true/g" \
|
|
||||||
"${CONF}"
|
|
||||||
cat >> "${CONF}" <<-EOF
|
|
||||||
|
|
||||||
# Morty configuration
|
|
||||||
result_proxy:
|
|
||||||
url: ${MORTY_URL}
|
|
||||||
key: !!binary "${MORTY_KEY}"
|
|
||||||
EOF
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
update_conf() {
|
|
||||||
FORCE_CONF_UPDATE=$1
|
|
||||||
CONF="$2"
|
|
||||||
NEW_CONF="${2}.new"
|
|
||||||
OLD_CONF="${2}.old"
|
|
||||||
REF_CONF="$3"
|
|
||||||
PATCH_REF_CONF="$4"
|
|
||||||
|
|
||||||
if [ -f "${CONF}" ]; then
|
|
||||||
if [ "${REF_CONF}" -nt "${CONF}" ]; then
|
|
||||||
# There is a new version
|
|
||||||
if [ "$FORCE_CONF_UPDATE" -ne 0 ]; then
|
|
||||||
# Replace the current configuration
|
|
||||||
printf '⚠️ Automatically update %s to the new version\n' "${CONF}"
|
|
||||||
if [ ! -f "${OLD_CONF}" ]; then
|
|
||||||
printf 'The previous configuration is saved to %s\n' "${OLD_CONF}"
|
|
||||||
mv "${CONF}" "${OLD_CONF}"
|
|
||||||
fi
|
|
||||||
cp "${REF_CONF}" "${CONF}"
|
|
||||||
$PATCH_REF_CONF "${CONF}"
|
|
||||||
else
|
|
||||||
# Keep the current configuration
|
|
||||||
printf '⚠️ Check new version %s to make sure SearXNG is working properly\n' "${NEW_CONF}"
|
|
||||||
cp "${REF_CONF}" "${NEW_CONF}"
|
|
||||||
$PATCH_REF_CONF "${NEW_CONF}"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
printf 'Use existing %s\n' "${CONF}"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
printf 'Create %s\n' "${CONF}"
|
|
||||||
cp "${REF_CONF}" "${CONF}"
|
|
||||||
$PATCH_REF_CONF "${CONF}"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# searx compatibility: copy /etc/searx/* to /etc/searxng/*
|
|
||||||
SEARX_CONF=0
|
|
||||||
if [ -f "/etc/searx/settings.yml" ]; then
|
|
||||||
if [ ! -f "${SEARXNG_SETTINGS_PATH}" ]; then
|
|
||||||
printf '⚠️ /etc/searx/settings.yml is copied to /etc/searxng\n'
|
|
||||||
cp "/etc/searx/settings.yml" "${SEARXNG_SETTINGS_PATH}"
|
|
||||||
fi
|
|
||||||
SEARX_CONF=1
|
|
||||||
fi
|
|
||||||
if [ -f "/etc/searx/uwsgi.ini" ]; then
|
|
||||||
printf '⚠️ /etc/searx/uwsgi.ini is ignored. Use the volume /etc/searxng\n'
|
|
||||||
SEARX_CONF=1
|
|
||||||
fi
|
|
||||||
if [ "$SEARX_CONF" -eq "1" ]; then
|
|
||||||
printf '⚠️ The deprecated volume /etc/searx is mounted. Please update your configuration to use /etc/searxng ⚠️\n'
|
|
||||||
cat << EOF > /etc/searx/deprecated_volume_read_me.txt
|
|
||||||
This Docker image uses the volume /etc/searxng
|
|
||||||
Update your configuration:
|
|
||||||
* remove uwsgi.ini (or very carefully update your existing uwsgi.ini using https://github.com/searxng/searxng/blob/master/dockerfiles/uwsgi.ini )
|
|
||||||
* mount /etc/searxng instead of /etc/searx
|
|
||||||
EOF
|
|
||||||
fi
|
|
||||||
# end of searx compatibility
|
|
||||||
|
|
||||||
# make sure there are uwsgi settings
|
|
||||||
update_conf "${FORCE_CONF_UPDATE}" "${UWSGI_SETTINGS_PATH}" "/usr/local/searxng/dockerfiles/uwsgi.ini" "patch_uwsgi_settings"
|
|
||||||
|
|
||||||
# make sure there are searxng settings
|
|
||||||
update_conf "${FORCE_CONF_UPDATE}" "${SEARXNG_SETTINGS_PATH}" "/usr/local/searxng/searx/settings.yml" "patch_searxng_settings"
|
|
||||||
|
|
||||||
# dry run (to update configuration files, then inspect them)
|
|
||||||
if [ $DRY_RUN -eq 1 ]; then
|
|
||||||
printf 'Dry run\n'
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
|
|
||||||
unset MORTY_KEY
|
|
||||||
|
|
||||||
# Start uwsgi
|
|
||||||
printf 'Listen on %s\n' "${BIND_ADDRESS}"
|
|
||||||
exec su-exec searxng:searxng uwsgi --master --http-socket "${BIND_ADDRESS}" "${UWSGI_SETTINGS_PATH}"
|
|
||||||
@@ -1,205 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
|
|
||||||
import sys, os
|
|
||||||
from pallets_sphinx_themes import ProjectLink
|
|
||||||
|
|
||||||
from searx import get_setting
|
|
||||||
from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
|
|
||||||
|
|
||||||
# Project --------------------------------------------------------------
|
|
||||||
|
|
||||||
project = 'SearXNG'
|
|
||||||
copyright = 'SearXNG team'
|
|
||||||
author = 'SearXNG team'
|
|
||||||
release, version = VERSION_STRING, VERSION_STRING
|
|
||||||
|
|
||||||
SEARXNG_URL = get_setting('server.base_url') or 'https://example.org/searxng'
|
|
||||||
ISSUE_URL = get_setting('brand.issue_url')
|
|
||||||
DOCS_URL = get_setting('brand.docs_url')
|
|
||||||
PUBLIC_INSTANCES = get_setting('brand.public_instances')
|
|
||||||
PRIVACYPOLICY_URL = get_setting('general.privacypolicy_url')
|
|
||||||
CONTACT_URL = get_setting('general.contact_url')
|
|
||||||
WIKI_URL = get_setting('brand.wiki_url')
|
|
||||||
|
|
||||||
# hint: sphinx.ext.viewcode won't highlight when 'highlight_language' [1] is set
|
|
||||||
# to string 'none' [2]
|
|
||||||
#
|
|
||||||
# [1] https://www.sphinx-doc.org/en/master/usage/extensions/viewcode.html
|
|
||||||
# [2] https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-highlight_language
|
|
||||||
|
|
||||||
highlight_language = 'default'
|
|
||||||
|
|
||||||
# General --------------------------------------------------------------
|
|
||||||
|
|
||||||
master_doc = "index"
|
|
||||||
source_suffix = '.rst'
|
|
||||||
numfig = True
|
|
||||||
|
|
||||||
exclude_patterns = ['build-templates/*.rst', 'user/*.md']
|
|
||||||
|
|
||||||
import searx.engines
|
|
||||||
import searx.plugins
|
|
||||||
import searx.webutils
|
|
||||||
|
|
||||||
# import searx.webapp is needed to init the engines & plugins, to init a
|
|
||||||
# (empty) secret_key is needed.
|
|
||||||
searx.settings['server']['secret_key'] = ''
|
|
||||||
import searx.webapp
|
|
||||||
|
|
||||||
searx.engines.load_engines(searx.settings['engines'])
|
|
||||||
|
|
||||||
jinja_contexts = {
|
|
||||||
'searx': {
|
|
||||||
'engines': searx.engines.engines,
|
|
||||||
'plugins': searx.plugins.plugins,
|
|
||||||
'version': {
|
|
||||||
'node': os.getenv('NODE_MINIMUM_VERSION')
|
|
||||||
},
|
|
||||||
'enabled_engine_count': sum(not x.disabled for x in searx.engines.engines.values()),
|
|
||||||
'categories': searx.engines.categories,
|
|
||||||
'categories_as_tabs': {c: searx.engines.categories[c] for c in searx.settings['categories_as_tabs']},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
jinja_filters = {
|
|
||||||
'group_engines_in_tab': searx.webutils.group_engines_in_tab,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Let the Jinja template in configured_engines.rst access documented_modules
|
|
||||||
# to automatically link documentation for modules if it exists.
|
|
||||||
def setup(app):
|
|
||||||
ENGINES_DOCNAME = 'user/configured_engines'
|
|
||||||
|
|
||||||
def before_read_docs(app, env, docnames):
|
|
||||||
assert ENGINES_DOCNAME in docnames
|
|
||||||
docnames.remove(ENGINES_DOCNAME)
|
|
||||||
docnames.append(ENGINES_DOCNAME)
|
|
||||||
# configured_engines must come last so that sphinx already has
|
|
||||||
# discovered the python module documentations
|
|
||||||
|
|
||||||
def source_read(app, docname, source):
|
|
||||||
if docname == ENGINES_DOCNAME:
|
|
||||||
jinja_contexts['searx']['documented_modules'] = app.env.domains['py'].modules
|
|
||||||
|
|
||||||
app.connect('env-before-read-docs', before_read_docs)
|
|
||||||
app.connect('source-read', source_read)
|
|
||||||
|
|
||||||
# usage:: lorem :patch:`f373169` ipsum
|
|
||||||
extlinks = {}
|
|
||||||
|
|
||||||
# upstream links
|
|
||||||
extlinks['wiki'] = ('https://github.com/searxng/searxng/wiki/%s', ' %s')
|
|
||||||
extlinks['pull'] = ('https://github.com/searxng/searxng/pull/%s', 'PR %s')
|
|
||||||
extlinks['pull-searx'] = ('https://github.com/searx/searx/pull/%s', 'PR %s')
|
|
||||||
|
|
||||||
# links to custom brand
|
|
||||||
extlinks['origin'] = (GIT_URL + '/blob/' + GIT_BRANCH + '/%s', 'git://%s')
|
|
||||||
extlinks['patch'] = (GIT_URL + '/commit/%s', '#%s')
|
|
||||||
extlinks['docs'] = (DOCS_URL + '/%s', 'docs: %s')
|
|
||||||
extlinks['pypi'] = ('https://pypi.org/project/%s', 'PyPi: %s')
|
|
||||||
extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '%s')
|
|
||||||
#extlinks['role'] = (
|
|
||||||
# 'https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-%s', '')
|
|
||||||
extlinks['duref'] = (
|
|
||||||
'https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#%s', '%s')
|
|
||||||
extlinks['durole'] = (
|
|
||||||
'https://docutils.sourceforge.io/docs/ref/rst/roles.html#%s', '%s')
|
|
||||||
extlinks['dudir'] = (
|
|
||||||
'https://docutils.sourceforge.io/docs/ref/rst/directives.html#%s', '%s')
|
|
||||||
extlinks['ctan'] = (
|
|
||||||
'https://ctan.org/pkg/%s', 'CTAN: %s')
|
|
||||||
|
|
||||||
extensions = [
|
|
||||||
'sphinx.ext.imgmath',
|
|
||||||
'sphinx.ext.extlinks',
|
|
||||||
'sphinx.ext.viewcode',
|
|
||||||
"sphinx.ext.autodoc",
|
|
||||||
"sphinx.ext.intersphinx",
|
|
||||||
"pallets_sphinx_themes",
|
|
||||||
"sphinx_issues", # https://github.com/sloria/sphinx-issues/blob/master/README.rst
|
|
||||||
"sphinx_jinja", # https://github.com/tardyp/sphinx-jinja
|
|
||||||
"sphinxcontrib.programoutput", # https://github.com/NextThought/sphinxcontrib-programoutput
|
|
||||||
'linuxdoc.kernel_include', # Implementation of the 'kernel-include' reST-directive.
|
|
||||||
'linuxdoc.rstFlatTable', # Implementation of the 'flat-table' reST-directive.
|
|
||||||
'linuxdoc.kfigure', # Sphinx extension which implements scalable image handling.
|
|
||||||
"sphinx_tabs.tabs", # https://github.com/djungelorm/sphinx-tabs
|
|
||||||
'myst_parser', # https://www.sphinx-doc.org/en/master/usage/markdown.html
|
|
||||||
'notfound.extension', # https://github.com/readthedocs/sphinx-notfound-page
|
|
||||||
]
|
|
||||||
|
|
||||||
autodoc_default_options = {
|
|
||||||
'member-order': 'groupwise',
|
|
||||||
}
|
|
||||||
|
|
||||||
myst_enable_extensions = [
|
|
||||||
"replacements", "smartquotes"
|
|
||||||
]
|
|
||||||
|
|
||||||
suppress_warnings = ['myst.domains']
|
|
||||||
|
|
||||||
intersphinx_mapping = {
|
|
||||||
"python": ("https://docs.python.org/3/", None),
|
|
||||||
"babel" : ("https://babel.readthedocs.io/en/latest/", None),
|
|
||||||
"flask": ("https://flask.palletsprojects.com/", None),
|
|
||||||
"flask_babel": ("https://python-babel.github.io/flask-babel/", None),
|
|
||||||
# "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
|
|
||||||
"jinja": ("https://jinja.palletsprojects.com/", None),
|
|
||||||
"linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
|
|
||||||
"sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
|
|
||||||
"redis": ('https://redis.readthedocs.io/en/stable/', None),
|
|
||||||
}
|
|
||||||
|
|
||||||
issues_github_path = "searxng/searxng"
|
|
||||||
|
|
||||||
# HTML -----------------------------------------------------------------
|
|
||||||
|
|
||||||
# https://searxng.github.io/searxng --> '/searxng/'
|
|
||||||
# https://docs.searxng.org --> '/'
|
|
||||||
notfound_urls_prefix = '/'
|
|
||||||
|
|
||||||
sys.path.append(os.path.abspath('_themes'))
|
|
||||||
sys.path.insert(0, os.path.abspath("../utils/"))
|
|
||||||
html_theme_path = ['_themes']
|
|
||||||
html_theme = "searxng"
|
|
||||||
|
|
||||||
# sphinx.ext.imgmath setup
|
|
||||||
html_math_renderer = 'imgmath'
|
|
||||||
imgmath_image_format = 'svg'
|
|
||||||
imgmath_font_size = 14
|
|
||||||
# sphinx.ext.imgmath setup END
|
|
||||||
|
|
||||||
html_show_sphinx = False
|
|
||||||
html_theme_options = {"index_sidebar_logo": True}
|
|
||||||
html_context = {"project_links": [] }
|
|
||||||
html_context["project_links"].append(ProjectLink("Source", GIT_URL + '/tree/' + GIT_BRANCH))
|
|
||||||
|
|
||||||
if WIKI_URL:
|
|
||||||
html_context["project_links"].append(ProjectLink("Wiki", WIKI_URL))
|
|
||||||
if PUBLIC_INSTANCES:
|
|
||||||
html_context["project_links"].append(ProjectLink("Public instances", PUBLIC_INSTANCES))
|
|
||||||
if ISSUE_URL:
|
|
||||||
html_context["project_links"].append(ProjectLink("Issue Tracker", ISSUE_URL))
|
|
||||||
if PRIVACYPOLICY_URL:
|
|
||||||
html_context["project_links"].append(ProjectLink("Privacy Policy", PRIVACYPOLICY_URL))
|
|
||||||
if CONTACT_URL:
|
|
||||||
html_context["project_links"].append(ProjectLink("Contact", CONTACT_URL))
|
|
||||||
|
|
||||||
html_sidebars = {
|
|
||||||
"**": [
|
|
||||||
"globaltoc.html",
|
|
||||||
"project.html",
|
|
||||||
"relations.html",
|
|
||||||
"searchbox.html",
|
|
||||||
"sourcelink.html"
|
|
||||||
],
|
|
||||||
}
|
|
||||||
singlehtml_sidebars = {"index": ["project.html", "localtoc.html"]}
|
|
||||||
html_logo = "../src/brand/searxng-wordmark.svg"
|
|
||||||
html_title = "SearXNG Documentation ({})".format(VERSION_STRING)
|
|
||||||
html_show_sourcelink = True
|
|
||||||
|
|
||||||
# LaTeX ----------------------------------------------------------------
|
|
||||||
|
|
||||||
latex_documents = [
|
|
||||||
(master_doc, "searxng-{}.tex".format(VERSION_STRING), html_title, author, "manual")
|
|
||||||
]
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
*.md
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
|
|
||||||
categories = ['general'] # optional
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
'''pre-request callback
|
|
||||||
params<dict>:
|
|
||||||
method : POST/GET
|
|
||||||
headers : {}
|
|
||||||
data : {} # if method == POST
|
|
||||||
url : ''
|
|
||||||
category: 'search category'
|
|
||||||
pageno : 1 # number of the requested page
|
|
||||||
'''
|
|
||||||
|
|
||||||
params['url'] = 'https://host/%s' % query
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
'''post-response callback
|
|
||||||
resp: requests response object
|
|
||||||
'''
|
|
||||||
return [{'url': '', 'title': '', 'content': ''}]
|
|
||||||
@@ -1,106 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
# pylint: disable=missing-module-docstring
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
from os.path import dirname, abspath
|
|
||||||
|
|
||||||
import logging
|
|
||||||
|
|
||||||
import searx.unixthreadname
|
|
||||||
import searx.settings_loader
|
|
||||||
from searx.settings_defaults import settings_set_defaults
|
|
||||||
|
|
||||||
|
|
||||||
# Debug
|
|
||||||
LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s'
|
|
||||||
|
|
||||||
# Production
|
|
||||||
LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
|
|
||||||
LOG_LEVEL_PROD = logging.WARNING
|
|
||||||
|
|
||||||
searx_dir = abspath(dirname(__file__))
|
|
||||||
searx_parent_dir = abspath(dirname(dirname(__file__)))
|
|
||||||
settings, settings_load_message = searx.settings_loader.load_settings()
|
|
||||||
|
|
||||||
if settings is not None:
|
|
||||||
settings = settings_set_defaults(settings)
|
|
||||||
|
|
||||||
_unset = object()
|
|
||||||
|
|
||||||
|
|
||||||
def get_setting(name, default=_unset):
|
|
||||||
"""Returns the value to which ``name`` point. If there is no such name in the
|
|
||||||
settings and the ``default`` is unset, a :py:obj:`KeyError` is raised.
|
|
||||||
|
|
||||||
"""
|
|
||||||
value = settings
|
|
||||||
for a in name.split('.'):
|
|
||||||
if isinstance(value, dict):
|
|
||||||
value = value.get(a, _unset)
|
|
||||||
else:
|
|
||||||
value = _unset
|
|
||||||
|
|
||||||
if value is _unset:
|
|
||||||
if default is _unset:
|
|
||||||
raise KeyError(name)
|
|
||||||
value = default
|
|
||||||
break
|
|
||||||
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
def is_color_terminal():
|
|
||||||
if os.getenv('TERM') in ('dumb', 'unknown'):
|
|
||||||
return False
|
|
||||||
return sys.stdout.isatty()
|
|
||||||
|
|
||||||
|
|
||||||
def logging_config_debug():
|
|
||||||
try:
|
|
||||||
import coloredlogs # pylint: disable=import-outside-toplevel
|
|
||||||
except ImportError:
|
|
||||||
coloredlogs = None
|
|
||||||
|
|
||||||
log_level = os.environ.get('SEARXNG_DEBUG_LOG_LEVEL', 'DEBUG')
|
|
||||||
if coloredlogs and is_color_terminal():
|
|
||||||
level_styles = {
|
|
||||||
'spam': {'color': 'green', 'faint': True},
|
|
||||||
'debug': {},
|
|
||||||
'notice': {'color': 'magenta'},
|
|
||||||
'success': {'bold': True, 'color': 'green'},
|
|
||||||
'info': {'bold': True, 'color': 'cyan'},
|
|
||||||
'warning': {'color': 'yellow'},
|
|
||||||
'error': {'color': 'red'},
|
|
||||||
'critical': {'bold': True, 'color': 'red'},
|
|
||||||
}
|
|
||||||
field_styles = {
|
|
||||||
'asctime': {'color': 'green'},
|
|
||||||
'hostname': {'color': 'magenta'},
|
|
||||||
'levelname': {'color': 8},
|
|
||||||
'name': {'color': 8},
|
|
||||||
'programname': {'color': 'cyan'},
|
|
||||||
'username': {'color': 'yellow'},
|
|
||||||
}
|
|
||||||
coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG)
|
|
||||||
else:
|
|
||||||
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
|
|
||||||
|
|
||||||
|
|
||||||
searx_debug = settings['general']['debug']
|
|
||||||
if searx_debug:
|
|
||||||
logging_config_debug()
|
|
||||||
else:
|
|
||||||
logging.basicConfig(level=LOG_LEVEL_PROD, format=LOG_FORMAT_PROD)
|
|
||||||
logging.root.setLevel(level=LOG_LEVEL_PROD)
|
|
||||||
logging.getLogger('werkzeug').setLevel(level=LOG_LEVEL_PROD)
|
|
||||||
logger = logging.getLogger('searx')
|
|
||||||
logger.info(settings_load_message)
|
|
||||||
|
|
||||||
# log max_request_timeout
|
|
||||||
max_request_timeout = settings['outgoing']['max_request_timeout']
|
|
||||||
if max_request_timeout is None:
|
|
||||||
logger.info('max_request_timeout=%s', repr(max_request_timeout))
|
|
||||||
else:
|
|
||||||
logger.info('max_request_timeout=%i second(s)', max_request_timeout)
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
from os import listdir
|
|
||||||
from os.path import realpath, dirname, join, isdir
|
|
||||||
from searx.utils import load_module
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
|
|
||||||
answerers_dir = dirname(realpath(__file__))
|
|
||||||
|
|
||||||
|
|
||||||
def load_answerers():
|
|
||||||
answerers = []
|
|
||||||
for filename in listdir(answerers_dir):
|
|
||||||
if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
|
|
||||||
continue
|
|
||||||
module = load_module('answerer.py', join(answerers_dir, filename))
|
|
||||||
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
|
|
||||||
exit(2)
|
|
||||||
answerers.append(module)
|
|
||||||
return answerers
|
|
||||||
|
|
||||||
|
|
||||||
def get_answerers_by_keywords(answerers):
|
|
||||||
by_keyword = defaultdict(list)
|
|
||||||
for answerer in answerers:
|
|
||||||
for keyword in answerer.keywords:
|
|
||||||
for keyword in answerer.keywords:
|
|
||||||
by_keyword[keyword].append(answerer.answer)
|
|
||||||
return by_keyword
|
|
||||||
|
|
||||||
|
|
||||||
def ask(query):
|
|
||||||
results = []
|
|
||||||
query_parts = list(filter(None, query.query.split()))
|
|
||||||
|
|
||||||
if not query_parts or query_parts[0] not in answerers_by_keywords:
|
|
||||||
return results
|
|
||||||
|
|
||||||
for answerer in answerers_by_keywords[query_parts[0]]:
|
|
||||||
result = answerer(query)
|
|
||||||
if result:
|
|
||||||
results.append(result)
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
answerers = load_answerers()
|
|
||||||
answerers_by_keywords = get_answerers_by_keywords(answerers)
|
|
||||||
@@ -1,70 +0,0 @@
|
|||||||
import hashlib
|
|
||||||
import random
|
|
||||||
import string
|
|
||||||
import uuid
|
|
||||||
from flask_babel import gettext
|
|
||||||
|
|
||||||
# required answerer attribute
|
|
||||||
# specifies which search query keywords triggers this answerer
|
|
||||||
keywords = ('random',)
|
|
||||||
|
|
||||||
random_int_max = 2**31
|
|
||||||
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
|
|
||||||
|
|
||||||
|
|
||||||
def random_characters():
|
|
||||||
return [random.choice(random_string_letters) for _ in range(random.randint(8, 32))]
|
|
||||||
|
|
||||||
|
|
||||||
def random_string():
|
|
||||||
return ''.join(random_characters())
|
|
||||||
|
|
||||||
|
|
||||||
def random_float():
|
|
||||||
return str(random.random())
|
|
||||||
|
|
||||||
|
|
||||||
def random_int():
|
|
||||||
return str(random.randint(-random_int_max, random_int_max))
|
|
||||||
|
|
||||||
|
|
||||||
def random_sha256():
|
|
||||||
m = hashlib.sha256()
|
|
||||||
m.update(''.join(random_characters()).encode())
|
|
||||||
return str(m.hexdigest())
|
|
||||||
|
|
||||||
|
|
||||||
def random_uuid():
|
|
||||||
return str(uuid.uuid4())
|
|
||||||
|
|
||||||
|
|
||||||
random_types = {
|
|
||||||
'string': random_string,
|
|
||||||
'int': random_int,
|
|
||||||
'float': random_float,
|
|
||||||
'sha256': random_sha256,
|
|
||||||
'uuid': random_uuid,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# required answerer function
|
|
||||||
# can return a list of results (any result type) for a given query
|
|
||||||
def answer(query):
|
|
||||||
parts = query.query.split()
|
|
||||||
if len(parts) != 2:
|
|
||||||
return []
|
|
||||||
|
|
||||||
if parts[1] not in random_types:
|
|
||||||
return []
|
|
||||||
|
|
||||||
return [{'answer': random_types[parts[1]]()}]
|
|
||||||
|
|
||||||
|
|
||||||
# required answerer function
|
|
||||||
# returns information about the answerer
|
|
||||||
def self_info():
|
|
||||||
return {
|
|
||||||
'name': gettext('Random value generator'),
|
|
||||||
'description': gettext('Generate different random values'),
|
|
||||||
'examples': ['random {}'.format(x) for x in random_types],
|
|
||||||
}
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
from functools import reduce
|
|
||||||
from operator import mul
|
|
||||||
|
|
||||||
from flask_babel import gettext
|
|
||||||
|
|
||||||
|
|
||||||
keywords = ('min', 'max', 'avg', 'sum', 'prod')
|
|
||||||
|
|
||||||
|
|
||||||
# required answerer function
|
|
||||||
# can return a list of results (any result type) for a given query
|
|
||||||
def answer(query):
|
|
||||||
parts = query.query.split()
|
|
||||||
|
|
||||||
if len(parts) < 2:
|
|
||||||
return []
|
|
||||||
|
|
||||||
try:
|
|
||||||
args = list(map(float, parts[1:]))
|
|
||||||
except:
|
|
||||||
return []
|
|
||||||
|
|
||||||
func = parts[0]
|
|
||||||
answer = None
|
|
||||||
|
|
||||||
if func == 'min':
|
|
||||||
answer = min(args)
|
|
||||||
elif func == 'max':
|
|
||||||
answer = max(args)
|
|
||||||
elif func == 'avg':
|
|
||||||
answer = sum(args) / len(args)
|
|
||||||
elif func == 'sum':
|
|
||||||
answer = sum(args)
|
|
||||||
elif func == 'prod':
|
|
||||||
answer = reduce(mul, args, 1)
|
|
||||||
|
|
||||||
if answer is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
return [{'answer': str(answer)}]
|
|
||||||
|
|
||||||
|
|
||||||
# required answerer function
|
|
||||||
# returns information about the answerer
|
|
||||||
def self_info():
|
|
||||||
return {
|
|
||||||
'name': gettext('Statistics functions'),
|
|
||||||
'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)),
|
|
||||||
'examples': ['avg 123 548 2.04 24.2'],
|
|
||||||
}
|
|
||||||
@@ -1,228 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""This module implements functions needed for the autocompleter.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=use-dict-literal
|
|
||||||
|
|
||||||
import json
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
import lxml
|
|
||||||
from httpx import HTTPError
|
|
||||||
|
|
||||||
from searx import settings
|
|
||||||
from searx.engines import (
|
|
||||||
engines,
|
|
||||||
google,
|
|
||||||
)
|
|
||||||
from searx.network import get as http_get
|
|
||||||
from searx.exceptions import SearxEngineResponseException
|
|
||||||
|
|
||||||
|
|
||||||
def get(*args, **kwargs):
|
|
||||||
if 'timeout' not in kwargs:
|
|
||||||
kwargs['timeout'] = settings['outgoing']['request_timeout']
|
|
||||||
kwargs['raise_for_httperror'] = True
|
|
||||||
return http_get(*args, **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
def brave(query, _lang):
|
|
||||||
# brave search autocompleter
|
|
||||||
url = 'https://search.brave.com/api/suggest?'
|
|
||||||
url += urlencode({'q': query})
|
|
||||||
country = 'all'
|
|
||||||
# if lang in _brave:
|
|
||||||
# country = lang
|
|
||||||
kwargs = {'cookies': {'country': country}}
|
|
||||||
resp = get(url, **kwargs)
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if resp.ok:
|
|
||||||
data = resp.json()
|
|
||||||
for item in data[1]:
|
|
||||||
results.append(item)
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def dbpedia(query, _lang):
|
|
||||||
# dbpedia autocompleter, no HTTPS
|
|
||||||
autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
|
|
||||||
|
|
||||||
response = get(autocomplete_url + urlencode(dict(QueryString=query)))
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if response.ok:
|
|
||||||
dom = lxml.etree.fromstring(response.content)
|
|
||||||
results = dom.xpath('//Result/Label//text()')
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def duckduckgo(query, sxng_locale):
|
|
||||||
"""Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
|
|
||||||
|
|
||||||
traits = engines['duckduckgo'].traits
|
|
||||||
args = {
|
|
||||||
'q': query,
|
|
||||||
'kl': traits.get_region(sxng_locale, traits.all_locale),
|
|
||||||
}
|
|
||||||
|
|
||||||
url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
|
|
||||||
resp = get(url)
|
|
||||||
|
|
||||||
ret_val = []
|
|
||||||
if resp.ok:
|
|
||||||
j = resp.json()
|
|
||||||
if len(j) > 1:
|
|
||||||
ret_val = j[1]
|
|
||||||
return ret_val
|
|
||||||
|
|
||||||
|
|
||||||
def google_complete(query, sxng_locale):
|
|
||||||
"""Autocomplete from Google. Supports Google's languages and subdomains
|
|
||||||
(:py:obj:`searx.engines.google.get_google_info`) by using the async REST
|
|
||||||
API::
|
|
||||||
|
|
||||||
https://{subdomain}/complete/search?{args}
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
|
|
||||||
|
|
||||||
url = 'https://{subdomain}/complete/search?{args}'
|
|
||||||
args = urlencode(
|
|
||||||
{
|
|
||||||
'q': query,
|
|
||||||
'client': 'gws-wiz',
|
|
||||||
'hl': google_info['params']['hl'],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
results = []
|
|
||||||
resp = get(url.format(subdomain=google_info['subdomain'], args=args))
|
|
||||||
if resp.ok:
|
|
||||||
json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
|
|
||||||
data = json.loads(json_txt)
|
|
||||||
for item in data[0]:
|
|
||||||
results.append(lxml.html.fromstring(item[0]).text_content())
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def seznam(query, _lang):
|
|
||||||
# seznam search autocompleter
|
|
||||||
url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
|
|
||||||
|
|
||||||
resp = get(
|
|
||||||
url.format(
|
|
||||||
query=urlencode(
|
|
||||||
{'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not resp.ok:
|
|
||||||
return []
|
|
||||||
|
|
||||||
data = resp.json()
|
|
||||||
return [
|
|
||||||
''.join([part.get('text', '') for part in item.get('text', [])])
|
|
||||||
for item in data.get('result', [])
|
|
||||||
if item.get('itemType', None) == 'ItemType.TEXT'
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def startpage(query, sxng_locale):
|
|
||||||
"""Autocomplete from Startpage. Supports Startpage's languages"""
|
|
||||||
lui = engines['startpage'].traits.get_language(sxng_locale, 'english')
|
|
||||||
url = 'https://startpage.com/suggestions?{query}'
|
|
||||||
resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
|
|
||||||
data = resp.json()
|
|
||||||
return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
|
|
||||||
|
|
||||||
|
|
||||||
def swisscows(query, _lang):
|
|
||||||
# swisscows autocompleter
|
|
||||||
url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
|
|
||||||
|
|
||||||
resp = json.loads(get(url.format(query=urlencode({'query': query}))).text)
|
|
||||||
return resp
|
|
||||||
|
|
||||||
|
|
||||||
def qwant(query, sxng_locale):
|
|
||||||
"""Autocomplete from Qwant. Supports Qwant's regions."""
|
|
||||||
results = []
|
|
||||||
|
|
||||||
locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
|
|
||||||
url = 'https://api.qwant.com/v3/suggest?{query}'
|
|
||||||
resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
|
|
||||||
|
|
||||||
if resp.ok:
|
|
||||||
data = resp.json()
|
|
||||||
if data['status'] == 'success':
|
|
||||||
for item in data['data']['items']:
|
|
||||||
results.append(item['value'])
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def wikipedia(query, sxng_locale):
|
|
||||||
"""Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
|
|
||||||
results = []
|
|
||||||
eng_traits = engines['wikipedia'].traits
|
|
||||||
wiki_lang = eng_traits.get_language(sxng_locale, 'en')
|
|
||||||
wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
|
|
||||||
|
|
||||||
url = 'https://{wiki_netloc}/w/api.php?{args}'
|
|
||||||
args = urlencode(
|
|
||||||
{
|
|
||||||
'action': 'opensearch',
|
|
||||||
'format': 'json',
|
|
||||||
'formatversion': '2',
|
|
||||||
'search': query,
|
|
||||||
'namespace': '0',
|
|
||||||
'limit': '10',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
|
|
||||||
if resp.ok:
|
|
||||||
data = resp.json()
|
|
||||||
if len(data) > 1:
|
|
||||||
results = data[1]
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def yandex(query, _lang):
|
|
||||||
# yandex autocompleter
|
|
||||||
url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
|
|
||||||
|
|
||||||
resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
|
|
||||||
if len(resp) > 1:
|
|
||||||
return resp[1]
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
backends = {
|
|
||||||
'dbpedia': dbpedia,
|
|
||||||
'duckduckgo': duckduckgo,
|
|
||||||
'google': google_complete,
|
|
||||||
'seznam': seznam,
|
|
||||||
'startpage': startpage,
|
|
||||||
'swisscows': swisscows,
|
|
||||||
'qwant': qwant,
|
|
||||||
'wikipedia': wikipedia,
|
|
||||||
'brave': brave,
|
|
||||||
'yandex': yandex,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def search_autocomplete(backend_name, query, sxng_locale):
|
|
||||||
backend = backends.get(backend_name)
|
|
||||||
if backend is None:
|
|
||||||
return []
|
|
||||||
try:
|
|
||||||
return backend(query, sxng_locale)
|
|
||||||
except (HTTPError, SearxEngineResponseException):
|
|
||||||
return []
|
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""This module implements the :origin:`searxng_msg <babel.cfg>` extractor to
|
|
||||||
extract messages from:
|
|
||||||
|
|
||||||
- :origin:`searx/searxng.msg`
|
|
||||||
|
|
||||||
The ``searxng.msg`` files are selected by Babel_, see Babel's configuration in
|
|
||||||
:origin:`babel.cfg`::
|
|
||||||
|
|
||||||
searxng_msg = searx.babel_extract.extract
|
|
||||||
...
|
|
||||||
[searxng_msg: **/searxng.msg]
|
|
||||||
|
|
||||||
A ``searxng.msg`` file is a python file that is *executed* by the
|
|
||||||
:py:obj:`extract` function. Additional ``searxng.msg`` files can be added by:
|
|
||||||
|
|
||||||
1. Adding a ``searxng.msg`` file in one of the SearXNG python packages and
|
|
||||||
2. implement a method in :py:obj:`extract` that yields messages from this file.
|
|
||||||
|
|
||||||
.. _Babel: https://babel.pocoo.org/en/latest/index.html
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from os import path
|
|
||||||
|
|
||||||
SEARXNG_MSG_FILE = "searxng.msg"
|
|
||||||
_MSG_FILES = [path.join(path.dirname(__file__), SEARXNG_MSG_FILE)]
|
|
||||||
|
|
||||||
|
|
||||||
def extract(
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
fileobj,
|
|
||||||
keywords,
|
|
||||||
comment_tags,
|
|
||||||
options,
|
|
||||||
):
|
|
||||||
"""Extract messages from ``searxng.msg`` files by a custom extractor_.
|
|
||||||
|
|
||||||
.. _extractor:
|
|
||||||
https://babel.pocoo.org/en/latest/messages.html#writing-extraction-methods
|
|
||||||
"""
|
|
||||||
if fileobj.name not in _MSG_FILES:
|
|
||||||
raise RuntimeError("don't know how to extract messages from %s" % fileobj.name)
|
|
||||||
|
|
||||||
namespace = {}
|
|
||||||
exec(fileobj.read(), {}, namespace) # pylint: disable=exec-used
|
|
||||||
|
|
||||||
for name in namespace['__all__']:
|
|
||||||
for k, v in namespace[name].items():
|
|
||||||
yield 0, '_', v, ["%s['%s']" % (name, k)]
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
""".. _botdetection src:
|
|
||||||
|
|
||||||
X-Forwarded-For
|
|
||||||
===============
|
|
||||||
|
|
||||||
.. attention::
|
|
||||||
|
|
||||||
A correct setup of the HTTP request headers ``X-Forwarded-For`` and
|
|
||||||
``X-Real-IP`` is essential to be able to assign a request to an IP correctly:
|
|
||||||
|
|
||||||
- `NGINX RequestHeader`_
|
|
||||||
- `Apache RequestHeader`_
|
|
||||||
|
|
||||||
.. _NGINX RequestHeader:
|
|
||||||
https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site
|
|
||||||
.. _Apache RequestHeader:
|
|
||||||
https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site
|
|
||||||
|
|
||||||
.. autofunction:: searx.botdetection.get_real_ip
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from ._helpers import dump_request
|
|
||||||
from ._helpers import get_real_ip
|
|
||||||
from ._helpers import too_many_requests
|
|
||||||
@@ -1,120 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
# pylint: disable=missing-module-docstring, invalid-name
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from ipaddress import (
|
|
||||||
IPv4Network,
|
|
||||||
IPv6Network,
|
|
||||||
IPv4Address,
|
|
||||||
IPv6Address,
|
|
||||||
ip_network,
|
|
||||||
)
|
|
||||||
import flask
|
|
||||||
import werkzeug
|
|
||||||
|
|
||||||
from searx.tools import config
|
|
||||||
from searx import logger
|
|
||||||
|
|
||||||
logger = logger.getChild('botdetection')
|
|
||||||
|
|
||||||
|
|
||||||
def dump_request(request: flask.Request):
|
|
||||||
return (
|
|
||||||
request.path
|
|
||||||
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
|
|
||||||
+ " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
|
|
||||||
+ " || form: %s" % request.form
|
|
||||||
+ " || Accept: %s" % request.headers.get('Accept')
|
|
||||||
+ " || Accept-Language: %s" % request.headers.get('Accept-Language')
|
|
||||||
+ " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
|
|
||||||
+ " || Content-Type: %s" % request.headers.get('Content-Type')
|
|
||||||
+ " || Content-Length: %s" % request.headers.get('Content-Length')
|
|
||||||
+ " || Connection: %s" % request.headers.get('Connection')
|
|
||||||
+ " || User-Agent: %s" % request.headers.get('User-Agent')
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
|
|
||||||
"""Returns a HTTP 429 response object and writes a ERROR message to the
|
|
||||||
'botdetection' logger. This function is used in part by the filter methods
|
|
||||||
to return the default ``Too Many Requests`` response.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
logger.debug("BLOCK %s: %s", network.compressed, log_msg)
|
|
||||||
return flask.make_response(('Too Many Requests', 429))
|
|
||||||
|
|
||||||
|
|
||||||
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
|
|
||||||
"""Returns the (client) network of whether the real_ip is part of."""
|
|
||||||
|
|
||||||
if real_ip.version == 6:
|
|
||||||
prefix = cfg['real_ip.ipv6_prefix']
|
|
||||||
else:
|
|
||||||
prefix = cfg['real_ip.ipv4_prefix']
|
|
||||||
network = ip_network(f"{real_ip}/{prefix}", strict=False)
|
|
||||||
# logger.debug("get_network(): %s", network.compressed)
|
|
||||||
return network
|
|
||||||
|
|
||||||
|
|
||||||
def get_real_ip(request: flask.Request) -> str:
|
|
||||||
"""Returns real IP of the request. Since not all proxies set all the HTTP
|
|
||||||
headers and incoming headers can be faked it may happen that the IP cannot
|
|
||||||
be determined correctly.
|
|
||||||
|
|
||||||
.. sidebar:: :py:obj:`flask.Request.remote_addr`
|
|
||||||
|
|
||||||
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
|
|
||||||
|
|
||||||
This function tries to get the remote IP in the order listed below,
|
|
||||||
additional some tests are done and if inconsistencies or errors are
|
|
||||||
detected, they are logged.
|
|
||||||
|
|
||||||
The remote IP of the request is taken from (first match):
|
|
||||||
|
|
||||||
- X-Forwarded-For_ header
|
|
||||||
- `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
|
|
||||||
- :py:obj:`flask.Request.remote_addr`
|
|
||||||
|
|
||||||
.. _ProxyFix:
|
|
||||||
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
|
|
||||||
|
|
||||||
.. _X-Forwarded-For:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
forwarded_for = request.headers.get("X-Forwarded-For")
|
|
||||||
real_ip = request.headers.get('X-Real-IP')
|
|
||||||
remote_addr = request.remote_addr
|
|
||||||
# logger.debug(
|
|
||||||
# "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
|
|
||||||
# )
|
|
||||||
|
|
||||||
if not forwarded_for:
|
|
||||||
logger.error("X-Forwarded-For header is not set!")
|
|
||||||
else:
|
|
||||||
from .limiter import get_cfg # pylint: disable=import-outside-toplevel, cyclic-import
|
|
||||||
|
|
||||||
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
|
|
||||||
x_for: int = get_cfg()['real_ip.x_for'] # type: ignore
|
|
||||||
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
|
|
||||||
|
|
||||||
if not real_ip:
|
|
||||||
logger.error("X-Real-IP header is not set!")
|
|
||||||
|
|
||||||
if forwarded_for and real_ip and forwarded_for != real_ip:
|
|
||||||
logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
|
|
||||||
|
|
||||||
if forwarded_for and remote_addr and forwarded_for != remote_addr:
|
|
||||||
logger.warning(
|
|
||||||
"IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
|
|
||||||
)
|
|
||||||
|
|
||||||
if real_ip and remote_addr and real_ip != remote_addr:
|
|
||||||
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
|
|
||||||
|
|
||||||
request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
|
|
||||||
# logger.debug("get_real_ip() -> %s", request_ip)
|
|
||||||
return request_ip
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Method ``http_accept``
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
The ``http_accept`` method evaluates a request as the request of a bot if the
|
|
||||||
Accept_ header ..
|
|
||||||
|
|
||||||
- did not contain ``text/html``
|
|
||||||
|
|
||||||
.. _Accept:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
from ipaddress import (
|
|
||||||
IPv4Network,
|
|
||||||
IPv6Network,
|
|
||||||
)
|
|
||||||
|
|
||||||
import flask
|
|
||||||
import werkzeug
|
|
||||||
|
|
||||||
from searx.tools import config
|
|
||||||
from ._helpers import too_many_requests
|
|
||||||
|
|
||||||
|
|
||||||
def filter_request(
|
|
||||||
network: IPv4Network | IPv6Network,
|
|
||||||
request: flask.Request,
|
|
||||||
cfg: config.Config,
|
|
||||||
) -> werkzeug.Response | None:
|
|
||||||
|
|
||||||
if 'text/html' not in request.accept_mimetypes:
|
|
||||||
return too_many_requests(network, "HTTP header Accept did not contain text/html")
|
|
||||||
return None
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Method ``http_accept_encoding``
|
|
||||||
-------------------------------
|
|
||||||
|
|
||||||
The ``http_accept_encoding`` method evaluates a request as the request of a
|
|
||||||
bot if the Accept-Encoding_ header ..
|
|
||||||
|
|
||||||
- did not contain ``gzip`` AND ``deflate`` (if both values are missed)
|
|
||||||
- did not contain ``text/html``
|
|
||||||
|
|
||||||
.. _Accept-Encoding:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
from ipaddress import (
|
|
||||||
IPv4Network,
|
|
||||||
IPv6Network,
|
|
||||||
)
|
|
||||||
|
|
||||||
import flask
|
|
||||||
import werkzeug
|
|
||||||
|
|
||||||
from searx.tools import config
|
|
||||||
from ._helpers import too_many_requests
|
|
||||||
|
|
||||||
|
|
||||||
def filter_request(
|
|
||||||
network: IPv4Network | IPv6Network,
|
|
||||||
request: flask.Request,
|
|
||||||
cfg: config.Config,
|
|
||||||
) -> werkzeug.Response | None:
|
|
||||||
|
|
||||||
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
|
|
||||||
if not ('gzip' in accept_list or 'deflate' in accept_list):
|
|
||||||
return too_many_requests(network, "HTTP header Accept-Encoding did not contain gzip nor deflate")
|
|
||||||
return None
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Method ``http_accept_language``
|
|
||||||
-------------------------------
|
|
||||||
|
|
||||||
The ``http_accept_language`` method evaluates a request as the request of a bot
|
|
||||||
if the Accept-Language_ header is unset.
|
|
||||||
|
|
||||||
.. _Accept-Language:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
from __future__ import annotations
|
|
||||||
from ipaddress import (
|
|
||||||
IPv4Network,
|
|
||||||
IPv6Network,
|
|
||||||
)
|
|
||||||
|
|
||||||
import flask
|
|
||||||
import werkzeug
|
|
||||||
|
|
||||||
from searx.tools import config
|
|
||||||
from ._helpers import too_many_requests
|
|
||||||
|
|
||||||
|
|
||||||
def filter_request(
|
|
||||||
network: IPv4Network | IPv6Network,
|
|
||||||
request: flask.Request,
|
|
||||||
cfg: config.Config,
|
|
||||||
) -> werkzeug.Response | None:
|
|
||||||
if request.headers.get('Accept-Language', '').strip() == '':
|
|
||||||
return too_many_requests(network, "missing HTTP header Accept-Language")
|
|
||||||
return None
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Method ``http_connection``
|
|
||||||
--------------------------
|
|
||||||
|
|
||||||
The ``http_connection`` method evaluates a request as the request of a bot if
|
|
||||||
the Connection_ header is set to ``close``.
|
|
||||||
|
|
||||||
.. _Connection:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
from ipaddress import (
|
|
||||||
IPv4Network,
|
|
||||||
IPv6Network,
|
|
||||||
)
|
|
||||||
|
|
||||||
import flask
|
|
||||||
import werkzeug
|
|
||||||
|
|
||||||
from searx.tools import config
|
|
||||||
from ._helpers import too_many_requests
|
|
||||||
|
|
||||||
|
|
||||||
def filter_request(
|
|
||||||
network: IPv4Network | IPv6Network,
|
|
||||||
request: flask.Request,
|
|
||||||
cfg: config.Config,
|
|
||||||
) -> werkzeug.Response | None:
|
|
||||||
|
|
||||||
if request.headers.get('Connection', '').strip() == 'close':
|
|
||||||
return too_many_requests(network, "HTTP header 'Connection=close")
|
|
||||||
return None
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Method ``http_user_agent``
|
|
||||||
--------------------------
|
|
||||||
|
|
||||||
The ``http_user_agent`` method evaluates a request as the request of a bot if
|
|
||||||
the User-Agent_ header is unset or matches the regular expression
|
|
||||||
:py:obj:`USER_AGENT`.
|
|
||||||
|
|
||||||
.. _User-Agent:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
import re
|
|
||||||
from ipaddress import (
|
|
||||||
IPv4Network,
|
|
||||||
IPv6Network,
|
|
||||||
)
|
|
||||||
|
|
||||||
import flask
|
|
||||||
import werkzeug
|
|
||||||
|
|
||||||
from searx.tools import config
|
|
||||||
from ._helpers import too_many_requests
|
|
||||||
|
|
||||||
|
|
||||||
USER_AGENT = (
|
|
||||||
r'('
|
|
||||||
+ r'unknown'
|
|
||||||
+ r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
|
|
||||||
+ r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
|
|
||||||
+ r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
|
|
||||||
+ r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
|
|
||||||
+ r'|ZmEu|BLEXBot|bitlybot'
|
|
||||||
# unmaintained Farside instances
|
|
||||||
+ r'|'
|
|
||||||
+ re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
|
|
||||||
# other bots and client to block
|
|
||||||
+ '|.*PetalBot.*'
|
|
||||||
+ r')'
|
|
||||||
)
|
|
||||||
"""Regular expression that matches to User-Agent_ from known *bots*"""
|
|
||||||
|
|
||||||
_regexp = None
|
|
||||||
|
|
||||||
|
|
||||||
def regexp_user_agent():
|
|
||||||
global _regexp # pylint: disable=global-statement
|
|
||||||
if not _regexp:
|
|
||||||
_regexp = re.compile(USER_AGENT)
|
|
||||||
return _regexp
|
|
||||||
|
|
||||||
|
|
||||||
def filter_request(
|
|
||||||
network: IPv4Network | IPv6Network,
|
|
||||||
request: flask.Request,
|
|
||||||
cfg: config.Config,
|
|
||||||
) -> werkzeug.Response | None:
|
|
||||||
|
|
||||||
user_agent = request.headers.get('User-Agent', 'unknown')
|
|
||||||
if regexp_user_agent().match(user_agent):
|
|
||||||
return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")
|
|
||||||
return None
|
|
||||||
@@ -1,148 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
""".. _botdetection.ip_limit:
|
|
||||||
|
|
||||||
Method ``ip_limit``
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
The ``ip_limit`` method counts request from an IP in *sliding windows*. If
|
|
||||||
there are to many requests in a sliding window, the request is evaluated as a
|
|
||||||
bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_
|
|
||||||
header. To take privacy only the hash value of an IP is stored in the redis DB
|
|
||||||
and at least for a maximum of 10 minutes.
|
|
||||||
|
|
||||||
The :py:obj:`.link_token` method can be used to investigate whether a request is
|
|
||||||
*suspicious*. To activate the :py:obj:`.link_token` method in the
|
|
||||||
:py:obj:`.ip_limit` method add the following to your
|
|
||||||
``/etc/searxng/limiter.toml``:
|
|
||||||
|
|
||||||
.. code:: toml
|
|
||||||
|
|
||||||
[botdetection.ip_limit]
|
|
||||||
link_token = true
|
|
||||||
|
|
||||||
If the :py:obj:`.link_token` method is activated and a request is *suspicious*
|
|
||||||
the request rates are reduced:
|
|
||||||
|
|
||||||
- :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
|
|
||||||
- :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
|
|
||||||
|
|
||||||
To intercept bots that get their IPs from a range of IPs, there is a
|
|
||||||
:py:obj:`SUSPICIOUS_IP_WINDOW`. In this window the suspicious IPs are stored
|
|
||||||
for a longer time. IPs stored in this sliding window have a maximum of
|
|
||||||
:py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked. As soon as the IP
|
|
||||||
makes a request that is not suspicious, the sliding window for this IP is
|
|
||||||
droped.
|
|
||||||
|
|
||||||
.. _X-Forwarded-For:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
|
||||||
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
from ipaddress import (
|
|
||||||
IPv4Network,
|
|
||||||
IPv6Network,
|
|
||||||
)
|
|
||||||
|
|
||||||
import flask
|
|
||||||
import werkzeug
|
|
||||||
from searx.tools import config
|
|
||||||
|
|
||||||
from searx import redisdb
|
|
||||||
from searx.redislib import incr_sliding_window, drop_counter
|
|
||||||
|
|
||||||
from . import link_token
|
|
||||||
from ._helpers import (
|
|
||||||
too_many_requests,
|
|
||||||
logger,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
logger = logger.getChild('ip_limit')
|
|
||||||
|
|
||||||
BURST_WINDOW = 20
|
|
||||||
"""Time (sec) before sliding window for *burst* requests expires."""
|
|
||||||
|
|
||||||
BURST_MAX = 15
|
|
||||||
"""Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
|
|
||||||
|
|
||||||
BURST_MAX_SUSPICIOUS = 2
|
|
||||||
"""Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
|
|
||||||
|
|
||||||
LONG_WINDOW = 600
|
|
||||||
"""Time (sec) before the longer sliding window expires."""
|
|
||||||
|
|
||||||
LONG_MAX = 150
|
|
||||||
"""Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
|
|
||||||
|
|
||||||
LONG_MAX_SUSPICIOUS = 10
|
|
||||||
"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
|
|
||||||
|
|
||||||
API_WONDOW = 3600
|
|
||||||
"""Time (sec) before sliding window for API requests (format != html) expires."""
|
|
||||||
|
|
||||||
API_MAX = 4
|
|
||||||
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
|
|
||||||
|
|
||||||
SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
|
|
||||||
"""Time (sec) before sliding window for one suspicious IP expires."""
|
|
||||||
|
|
||||||
SUSPICIOUS_IP_MAX = 3
|
|
||||||
"""Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
|
|
||||||
|
|
||||||
|
|
||||||
def filter_request(
|
|
||||||
network: IPv4Network | IPv6Network,
|
|
||||||
request: flask.Request,
|
|
||||||
cfg: config.Config,
|
|
||||||
) -> werkzeug.Response | None:
|
|
||||||
|
|
||||||
# pylint: disable=too-many-return-statements
|
|
||||||
redis_client = redisdb.client()
|
|
||||||
|
|
||||||
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
|
|
||||||
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
|
|
||||||
return None
|
|
||||||
|
|
||||||
if request.args.get('format', 'html') != 'html':
|
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
|
|
||||||
if c > API_MAX:
|
|
||||||
return too_many_requests(network, "too many request in API_WINDOW")
|
|
||||||
|
|
||||||
if cfg['botdetection.ip_limit.link_token']:
|
|
||||||
|
|
||||||
suspicious = link_token.is_suspicious(network, request, True)
|
|
||||||
|
|
||||||
if not suspicious:
|
|
||||||
# this IP is no longer suspicious: release ip again / delete the counter of this IP
|
|
||||||
drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
|
|
||||||
return None
|
|
||||||
|
|
||||||
# this IP is suspicious: count requests from this IP
|
|
||||||
c = incr_sliding_window(
|
|
||||||
redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
|
|
||||||
)
|
|
||||||
if c > SUSPICIOUS_IP_MAX:
|
|
||||||
logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
|
|
||||||
return flask.redirect(flask.url_for('index'), code=302)
|
|
||||||
|
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
|
|
||||||
if c > BURST_MAX_SUSPICIOUS:
|
|
||||||
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
|
|
||||||
|
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
|
|
||||||
if c > LONG_MAX_SUSPICIOUS:
|
|
||||||
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
# vanilla limiter without extensions counts BURST_MAX and LONG_MAX
|
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
|
|
||||||
if c > BURST_MAX:
|
|
||||||
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
|
|
||||||
|
|
||||||
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
|
|
||||||
if c > LONG_MAX:
|
|
||||||
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")
|
|
||||||
|
|
||||||
return None
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
""".. _botdetection.ip_lists:
|
|
||||||
|
|
||||||
Method ``ip_lists``
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
|
|
||||||
:py:obj:`pass-lists <pass_ip>`.
|
|
||||||
|
|
||||||
.. code:: toml
|
|
||||||
|
|
||||||
[botdetection.ip_lists]
|
|
||||||
|
|
||||||
pass_ip = [
|
|
||||||
'140.238.172.132', # IPv4 of check.searx.space
|
|
||||||
'192.168.0.0/16', # IPv4 private network
|
|
||||||
'fe80::/10' # IPv6 linklocal
|
|
||||||
]
|
|
||||||
block_ip = [
|
|
||||||
'93.184.216.34', # IPv4 of example.org
|
|
||||||
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
|
|
||||||
]
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=unused-argument
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
from typing import Tuple
|
|
||||||
from ipaddress import (
|
|
||||||
ip_network,
|
|
||||||
IPv4Address,
|
|
||||||
IPv6Address,
|
|
||||||
)
|
|
||||||
|
|
||||||
from searx.tools import config
|
|
||||||
from ._helpers import logger
|
|
||||||
|
|
||||||
logger = logger.getChild('ip_limit')
|
|
||||||
|
|
||||||
SEARXNG_ORG = [
|
|
||||||
# https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
|
|
||||||
'140.238.172.132', # IPv4 check.searx.space
|
|
||||||
'2603:c022:0:4900::/56', # IPv6 check.searx.space
|
|
||||||
]
|
|
||||||
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
|
|
||||||
|
|
||||||
|
|
||||||
def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
|
|
||||||
"""Checks if the IP on the subnet is in one of the members of the
|
|
||||||
``botdetection.ip_lists.pass_ip`` list.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
|
|
||||||
for net in SEARXNG_ORG:
|
|
||||||
net = ip_network(net, strict=False)
|
|
||||||
if real_ip.version == net.version and real_ip in net:
|
|
||||||
return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
|
|
||||||
return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)
|
|
||||||
|
|
||||||
|
|
||||||
def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
|
|
||||||
"""Checks if the IP on the subnet is in one of the members of the
|
|
||||||
``botdetection.ip_lists.block_ip`` list.
|
|
||||||
"""
|
|
||||||
|
|
||||||
block, msg = ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.block_ip', cfg)
|
|
||||||
if block:
|
|
||||||
msg += " To remove IP from list, please contact the maintainer of the service."
|
|
||||||
return block, msg
|
|
||||||
|
|
||||||
|
|
||||||
def ip_is_subnet_of_member_in_list(
|
|
||||||
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
|
|
||||||
) -> Tuple[bool, str]:
|
|
||||||
|
|
||||||
for net in cfg.get(list_name, default=[]):
|
|
||||||
try:
|
|
||||||
net = ip_network(net, strict=False)
|
|
||||||
except ValueError:
|
|
||||||
logger.error("invalid IP %s in %s", net, list_name)
|
|
||||||
continue
|
|
||||||
if real_ip.version == net.version and real_ip in net:
|
|
||||||
return True, f"IP matches {net.compressed} in {list_name}."
|
|
||||||
return False, f"IP is not a member of an item in the f{list_name} list"
|
|
||||||
@@ -1,147 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
""".. _limiter src:
|
|
||||||
|
|
||||||
Limiter
|
|
||||||
=======
|
|
||||||
|
|
||||||
.. sidebar:: info
|
|
||||||
|
|
||||||
The limiter requires a :ref:`Redis <settings redis>` database.
|
|
||||||
|
|
||||||
Bot protection / IP rate limitation. The intention of rate limitation is to
|
|
||||||
limit suspicious requests from an IP. The motivation behind this is the fact
|
|
||||||
that SearXNG passes through requests from bots and is thus classified as a bot
|
|
||||||
itself. As a result, the SearXNG engine then receives a CAPTCHA or is blocked
|
|
||||||
by the search engine (the origin) in some other way.
|
|
||||||
|
|
||||||
To avoid blocking, the requests from bots to SearXNG must also be blocked, this
|
|
||||||
is the task of the limiter. To perform this task, the limiter uses the methods
|
|
||||||
from the :py:obj:`searx.botdetection`.
|
|
||||||
|
|
||||||
To enable the limiter activate:
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
server:
|
|
||||||
...
|
|
||||||
limiter: true # rate limit the number of request on the instance, block some bots
|
|
||||||
|
|
||||||
and set the redis-url connection. Check the value, it depends on your redis DB
|
|
||||||
(see :ref:`settings redis`), by example:
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
redis:
|
|
||||||
url: unix:///usr/local/searxng-redis/run/redis.sock?db=0
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from ipaddress import ip_address
|
|
||||||
import flask
|
|
||||||
import werkzeug
|
|
||||||
|
|
||||||
from searx.tools import config
|
|
||||||
from searx import logger
|
|
||||||
|
|
||||||
from . import (
|
|
||||||
http_accept,
|
|
||||||
http_accept_encoding,
|
|
||||||
http_accept_language,
|
|
||||||
http_connection,
|
|
||||||
http_user_agent,
|
|
||||||
ip_limit,
|
|
||||||
ip_lists,
|
|
||||||
)
|
|
||||||
|
|
||||||
from ._helpers import (
|
|
||||||
get_network,
|
|
||||||
get_real_ip,
|
|
||||||
dump_request,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logger.getChild('botdetection.limiter')
|
|
||||||
|
|
||||||
CFG: config.Config = None # type: ignore
|
|
||||||
|
|
||||||
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
|
|
||||||
"""Base configuration (schema) of the botdetection."""
|
|
||||||
|
|
||||||
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
|
|
||||||
"""Lokal Limiter configuration."""
|
|
||||||
|
|
||||||
CFG_DEPRECATED = {
|
|
||||||
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_cfg() -> config.Config:
|
|
||||||
global CFG # pylint: disable=global-statement
|
|
||||||
if CFG is None:
|
|
||||||
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
|
|
||||||
return CFG
|
|
||||||
|
|
||||||
|
|
||||||
def filter_request(request: flask.Request) -> werkzeug.Response | None:
|
|
||||||
# pylint: disable=too-many-return-statements
|
|
||||||
|
|
||||||
cfg = get_cfg()
|
|
||||||
real_ip = ip_address(get_real_ip(request))
|
|
||||||
network = get_network(real_ip, cfg)
|
|
||||||
|
|
||||||
if request.path == '/healthz':
|
|
||||||
return None
|
|
||||||
|
|
||||||
# link-local
|
|
||||||
|
|
||||||
if network.is_link_local:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# block- & pass- lists
|
|
||||||
#
|
|
||||||
# 1. The IP of the request is first checked against the pass-list; if the IP
|
|
||||||
# matches an entry in the list, the request is not blocked.
|
|
||||||
# 2. If no matching entry is found in the pass-list, then a check is made against
|
|
||||||
# the block list; if the IP matches an entry in the list, the request is
|
|
||||||
# blocked.
|
|
||||||
# 3. If the IP is not in either list, the request is not blocked.
|
|
||||||
|
|
||||||
match, msg = ip_lists.pass_ip(real_ip, cfg)
|
|
||||||
if match:
|
|
||||||
logger.warning("PASS %s: matched PASSLIST - %s", network.compressed, msg)
|
|
||||||
return None
|
|
||||||
|
|
||||||
match, msg = ip_lists.block_ip(real_ip, cfg)
|
|
||||||
if match:
|
|
||||||
logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
|
|
||||||
return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
|
|
||||||
|
|
||||||
# methods applied on /
|
|
||||||
|
|
||||||
for func in [
|
|
||||||
http_user_agent,
|
|
||||||
]:
|
|
||||||
val = func.filter_request(network, request, cfg)
|
|
||||||
if val is not None:
|
|
||||||
return val
|
|
||||||
|
|
||||||
# methods applied on /search
|
|
||||||
|
|
||||||
if request.path == '/search':
|
|
||||||
|
|
||||||
for func in [
|
|
||||||
http_accept,
|
|
||||||
http_accept_encoding,
|
|
||||||
http_accept_language,
|
|
||||||
http_connection,
|
|
||||||
http_user_agent,
|
|
||||||
ip_limit,
|
|
||||||
]:
|
|
||||||
val = func.filter_request(network, request, cfg)
|
|
||||||
if val is not None:
|
|
||||||
return val
|
|
||||||
logger.debug(f"OK {network}: %s", dump_request(flask.request))
|
|
||||||
return None
|
|
||||||
@@ -1,157 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Method ``link_token``
|
|
||||||
---------------------
|
|
||||||
|
|
||||||
The ``link_token`` method evaluates a request as :py:obj:`suspicious
|
|
||||||
<is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
|
|
||||||
client. By adding a random component (the token) in the URL, a bot can not send
|
|
||||||
a ping by request a static URL.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
|
|
||||||
|
|
||||||
To get in use of this method a flask URL route needs to be added:
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
@app.route('/client<token>.css', methods=['GET', 'POST'])
|
|
||||||
def client_token(token=None):
|
|
||||||
link_token.ping(request, token)
|
|
||||||
return Response('', mimetype='text/css')
|
|
||||||
|
|
||||||
And in the HTML template from flask a stylesheet link is needed (the value of
|
|
||||||
``link_token`` comes from :py:obj:`get_token`):
|
|
||||||
|
|
||||||
.. code:: html
|
|
||||||
|
|
||||||
<link rel="stylesheet"
|
|
||||||
href="{{ url_for('client_token', token=link_token) }}"
|
|
||||||
type="text/css" />
|
|
||||||
|
|
||||||
.. _X-Forwarded-For:
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
|
|
||||||
|
|
||||||
"""
|
|
||||||
from __future__ import annotations
|
|
||||||
from ipaddress import (
|
|
||||||
IPv4Network,
|
|
||||||
IPv6Network,
|
|
||||||
ip_address,
|
|
||||||
)
|
|
||||||
|
|
||||||
import string
|
|
||||||
import random
|
|
||||||
import flask
|
|
||||||
|
|
||||||
from searx import logger
|
|
||||||
from searx import redisdb
|
|
||||||
from searx.redislib import secret_hash
|
|
||||||
|
|
||||||
from ._helpers import (
|
|
||||||
get_network,
|
|
||||||
get_real_ip,
|
|
||||||
)
|
|
||||||
|
|
||||||
TOKEN_LIVE_TIME = 600
|
|
||||||
"""Livetime (sec) of limiter's CSS token."""
|
|
||||||
|
|
||||||
PING_LIVE_TIME = 3600
|
|
||||||
"""Livetime (sec) of the ping-key from a client (request)"""
|
|
||||||
|
|
||||||
PING_KEY = 'SearXNG_limiter.ping'
|
|
||||||
"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
|
|
||||||
|
|
||||||
TOKEN_KEY = 'SearXNG_limiter.token'
|
|
||||||
"""Key for which the current token is stored in the DB"""
|
|
||||||
|
|
||||||
logger = logger.getChild('botdetection.link_token')
|
|
||||||
|
|
||||||
|
|
||||||
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
|
|
||||||
"""Checks whether a valid ping is exists for this (client) network, if not
|
|
||||||
this request is rated as *suspicious*. If a valid ping exists and argument
|
|
||||||
``renew`` is ``True`` the expire time of this ping is reset to
|
|
||||||
:py:obj:`PING_LIVE_TIME`.
|
|
||||||
|
|
||||||
"""
|
|
||||||
redis_client = redisdb.client()
|
|
||||||
if not redis_client:
|
|
||||||
return False
|
|
||||||
|
|
||||||
ping_key = get_ping_key(network, request)
|
|
||||||
if not redis_client.get(ping_key):
|
|
||||||
logger.warning("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
|
|
||||||
return True
|
|
||||||
|
|
||||||
if renew:
|
|
||||||
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
|
||||||
|
|
||||||
logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def ping(request: flask.Request, token: str):
|
|
||||||
"""This function is called by a request to URL ``/client<token>.css``. If
|
|
||||||
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
|
|
||||||
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
|
|
||||||
|
|
||||||
"""
|
|
||||||
from . import limiter # pylint: disable=import-outside-toplevel, cyclic-import
|
|
||||||
|
|
||||||
redis_client = redisdb.client()
|
|
||||||
if not redis_client:
|
|
||||||
return
|
|
||||||
if not token_is_valid(token):
|
|
||||||
return
|
|
||||||
|
|
||||||
cfg = limiter.get_cfg()
|
|
||||||
real_ip = ip_address(get_real_ip(request))
|
|
||||||
network = get_network(real_ip, cfg)
|
|
||||||
|
|
||||||
ping_key = get_ping_key(network, request)
|
|
||||||
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
|
|
||||||
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
|
|
||||||
|
|
||||||
|
|
||||||
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
|
|
||||||
"""Generates a hashed key that fits (more or less) to a *WEB-browser
|
|
||||||
session* in a network."""
|
|
||||||
return (
|
|
||||||
PING_KEY
|
|
||||||
+ "["
|
|
||||||
+ secret_hash(
|
|
||||||
network.compressed + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
|
|
||||||
)
|
|
||||||
+ "]"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def token_is_valid(token) -> bool:
|
|
||||||
valid = token == get_token()
|
|
||||||
logger.debug("token is valid --> %s", valid)
|
|
||||||
return valid
|
|
||||||
|
|
||||||
|
|
||||||
def get_token() -> str:
|
|
||||||
"""Returns current token. If there is no currently active token a new token
|
|
||||||
is generated randomly and stored in the redis DB.
|
|
||||||
|
|
||||||
- :py:obj:`TOKEN_LIVE_TIME`
|
|
||||||
- :py:obj:`TOKEN_KEY`
|
|
||||||
|
|
||||||
"""
|
|
||||||
redis_client = redisdb.client()
|
|
||||||
if not redis_client:
|
|
||||||
# This function is also called when limiter is inactive / no redis DB
|
|
||||||
# (see render function in webapp.py)
|
|
||||||
return '12345678'
|
|
||||||
token = redis_client.get(TOKEN_KEY)
|
|
||||||
if token:
|
|
||||||
token = token.decode('UTF-8')
|
|
||||||
else:
|
|
||||||
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
|
|
||||||
redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
|
|
||||||
return token
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
# pyright: basic
|
|
||||||
"""Module for backward compatibility.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=C,R
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = ('cached_property',)
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
from functools import cached_property # type: ignore
|
|
||||||
|
|
||||||
except ImportError:
|
|
||||||
|
|
||||||
# cache_property has been added in py3.8 [1]
|
|
||||||
#
|
|
||||||
# To support cache_property in py3.7 the implementation from 3.8 has been
|
|
||||||
# copied here. This code can be cleanup with EOL of py3.7.
|
|
||||||
#
|
|
||||||
# [1] https://docs.python.org/3/library/functools.html#functools.cached_property
|
|
||||||
|
|
||||||
from threading import RLock
|
|
||||||
|
|
||||||
_NOT_FOUND = object()
|
|
||||||
|
|
||||||
class cached_property:
|
|
||||||
def __init__(self, func):
|
|
||||||
self.func = func
|
|
||||||
self.attrname = None
|
|
||||||
self.__doc__ = func.__doc__
|
|
||||||
self.lock = RLock()
|
|
||||||
|
|
||||||
def __set_name__(self, owner, name):
|
|
||||||
if self.attrname is None:
|
|
||||||
self.attrname = name
|
|
||||||
elif name != self.attrname:
|
|
||||||
raise TypeError(
|
|
||||||
"Cannot assign the same cached_property to two different names "
|
|
||||||
f"({self.attrname!r} and {name!r})."
|
|
||||||
)
|
|
||||||
|
|
||||||
def __get__(self, instance, owner=None):
|
|
||||||
if instance is None:
|
|
||||||
return self
|
|
||||||
if self.attrname is None:
|
|
||||||
raise TypeError("Cannot use cached_property instance without calling __set_name__ on it.")
|
|
||||||
try:
|
|
||||||
cache = instance.__dict__
|
|
||||||
except AttributeError: # not all objects have __dict__ (e.g. class defines slots)
|
|
||||||
msg = (
|
|
||||||
f"No '__dict__' attribute on {type(instance).__name__!r} "
|
|
||||||
f"instance to cache {self.attrname!r} property."
|
|
||||||
)
|
|
||||||
raise TypeError(msg) from None
|
|
||||||
val = cache.get(self.attrname, _NOT_FOUND)
|
|
||||||
if val is _NOT_FOUND:
|
|
||||||
with self.lock:
|
|
||||||
# check if another thread filled cache while we awaited lock
|
|
||||||
val = cache.get(self.attrname, _NOT_FOUND)
|
|
||||||
if val is _NOT_FOUND:
|
|
||||||
val = self.func(instance)
|
|
||||||
try:
|
|
||||||
cache[self.attrname] = val
|
|
||||||
except TypeError:
|
|
||||||
msg = (
|
|
||||||
f"The '__dict__' attribute on {type(instance).__name__!r} instance "
|
|
||||||
f"does not support item assignment for caching {self.attrname!r} property."
|
|
||||||
)
|
|
||||||
raise TypeError(msg) from None
|
|
||||||
return val
|
|
||||||
@@ -1,52 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""This module holds the *data* created by::
|
|
||||||
|
|
||||||
make data.all
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'ENGINE_TRAITS',
|
|
||||||
'CURRENCIES',
|
|
||||||
'USER_AGENTS',
|
|
||||||
'EXTERNAL_URLS',
|
|
||||||
'WIKIDATA_UNITS',
|
|
||||||
'EXTERNAL_BANGS',
|
|
||||||
'OSM_KEYS_TAGS',
|
|
||||||
'ENGINE_DESCRIPTIONS',
|
|
||||||
'ahmia_blacklist_loader',
|
|
||||||
]
|
|
||||||
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
data_dir = Path(__file__).parent
|
|
||||||
|
|
||||||
|
|
||||||
def _load(filename):
|
|
||||||
with open(data_dir / filename, encoding='utf-8') as f:
|
|
||||||
return json.load(f)
|
|
||||||
|
|
||||||
|
|
||||||
def ahmia_blacklist_loader():
|
|
||||||
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
|
|
||||||
names. The MD5 values are fetched by::
|
|
||||||
|
|
||||||
searxng_extra/update/update_ahmia_blacklist.py
|
|
||||||
|
|
||||||
This function is used by :py:mod:`searx.plugins.ahmia_filter`.
|
|
||||||
|
|
||||||
"""
|
|
||||||
with open(data_dir / 'ahmia_blacklist.txt', encoding='utf-8') as f:
|
|
||||||
return f.read().split()
|
|
||||||
|
|
||||||
|
|
||||||
CURRENCIES = _load('currencies.json')
|
|
||||||
USER_AGENTS = _load('useragents.json')
|
|
||||||
EXTERNAL_URLS = _load('external_urls.json')
|
|
||||||
WIKIDATA_UNITS = _load('wikidata_units.json')
|
|
||||||
EXTERNAL_BANGS = _load('external_bangs.json')
|
|
||||||
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
|
|
||||||
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
|
|
||||||
ENGINE_TRAITS = _load('engine_traits.json')
|
|
||||||
@@ -1,145 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Implementations of the framework for the SearXNG engines.
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
The long term goal is to modularize all implementations of the engine
|
|
||||||
framework here in this Python package. ToDo:
|
|
||||||
|
|
||||||
- move implementations of the :ref:`searx.engines loader` to a new module in
|
|
||||||
the :py:obj:`searx.enginelib` namespace.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
from typing import List, Callable, TYPE_CHECKING
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from searx.enginelib import traits
|
|
||||||
|
|
||||||
|
|
||||||
class Engine: # pylint: disable=too-few-public-methods
|
|
||||||
"""Class of engine instances build from YAML settings.
|
|
||||||
|
|
||||||
Further documentation see :ref:`general engine configuration`.
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
This class is currently never initialized and only used for type hinting.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Common options in the engine module
|
|
||||||
|
|
||||||
engine_type: str
|
|
||||||
"""Type of the engine (:ref:`searx.search.processors`)"""
|
|
||||||
|
|
||||||
paging: bool
|
|
||||||
"""Engine supports multiple pages."""
|
|
||||||
|
|
||||||
time_range_support: bool
|
|
||||||
"""Engine supports search time range."""
|
|
||||||
|
|
||||||
safesearch: bool
|
|
||||||
"""Engine supports SafeSearch"""
|
|
||||||
|
|
||||||
language_support: bool
|
|
||||||
"""Engine supports languages (locales) search."""
|
|
||||||
|
|
||||||
language: str
|
|
||||||
"""For an engine, when there is ``language: ...`` in the YAML settings the engine
|
|
||||||
does support only this one language:
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: google french
|
|
||||||
engine: google
|
|
||||||
language: fr
|
|
||||||
"""
|
|
||||||
|
|
||||||
region: str
|
|
||||||
"""For an engine, when there is ``region: ...`` in the YAML settings the engine
|
|
||||||
does support only this one region::
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: google belgium
|
|
||||||
engine: google
|
|
||||||
region: fr-BE
|
|
||||||
"""
|
|
||||||
|
|
||||||
fetch_traits: Callable
|
|
||||||
"""Function to to fetch engine's traits from origin."""
|
|
||||||
|
|
||||||
traits: traits.EngineTraits
|
|
||||||
"""Traits of the engine."""
|
|
||||||
|
|
||||||
# settings.yml
|
|
||||||
|
|
||||||
categories: List[str]
|
|
||||||
"""Specifies to which :ref:`engine categories` the engine should be added."""
|
|
||||||
|
|
||||||
name: str
|
|
||||||
"""Name that will be used across SearXNG to define this engine. In settings, on
|
|
||||||
the result page .."""
|
|
||||||
|
|
||||||
engine: str
|
|
||||||
"""Name of the python file used to handle requests and responses to and from
|
|
||||||
this search engine (file name from :origin:`searx/engines` without
|
|
||||||
``.py``)."""
|
|
||||||
|
|
||||||
enable_http: bool
|
|
||||||
"""Enable HTTP (by default only HTTPS is enabled)."""
|
|
||||||
|
|
||||||
shortcut: str
|
|
||||||
"""Code used to execute bang requests (``!foo``)"""
|
|
||||||
|
|
||||||
timeout: float
|
|
||||||
"""Specific timeout for search-engine."""
|
|
||||||
|
|
||||||
display_error_messages: bool
|
|
||||||
"""Display error messages on the web UI."""
|
|
||||||
|
|
||||||
proxies: dict
|
|
||||||
"""Set proxies for a specific engine (YAML):
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
proxies :
|
|
||||||
http: socks5://proxy:port
|
|
||||||
https: socks5://proxy:port
|
|
||||||
"""
|
|
||||||
|
|
||||||
disabled: bool
|
|
||||||
"""To disable by default the engine, but not deleting it. It will allow the
|
|
||||||
user to manually activate it in the settings."""
|
|
||||||
|
|
||||||
inactive: bool
|
|
||||||
"""Remove the engine from the settings (*disabled & removed*)."""
|
|
||||||
|
|
||||||
about: dict
|
|
||||||
"""Additional fileds describing the engine.
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
about:
|
|
||||||
website: https://example.com
|
|
||||||
wikidata_id: Q306656
|
|
||||||
official_api_documentation: https://example.com/api-doc
|
|
||||||
use_official_api: true
|
|
||||||
require_api_key: true
|
|
||||||
results: HTML
|
|
||||||
"""
|
|
||||||
|
|
||||||
using_tor_proxy: bool
|
|
||||||
"""Using tor proxy (``true``) or not (``false``) for this engine."""
|
|
||||||
|
|
||||||
send_accept_language_header: bool
|
|
||||||
"""When this option is activated, the language (locale) that is selected by
|
|
||||||
the user is used to build and send a ``Accept-Language`` header in the
|
|
||||||
request to the origin search engine."""
|
|
||||||
|
|
||||||
tokens: List[str]
|
|
||||||
"""A list of secret tokens to make this engine *private*, more details see
|
|
||||||
:ref:`private engines`."""
|
|
||||||
@@ -1,252 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Engine's traits are fetched from the origin engines and stored in a JSON file
|
|
||||||
in the *data folder*. Most often traits are languages and region codes and
|
|
||||||
their mapping from SearXNG's representation to the representation in the origin
|
|
||||||
search engine. For new traits new properties can be added to the class
|
|
||||||
:py:class:`EngineTraits`.
|
|
||||||
|
|
||||||
To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
|
|
||||||
used.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
import json
|
|
||||||
import dataclasses
|
|
||||||
import types
|
|
||||||
from typing import Dict, Iterable, Union, Callable, Optional, TYPE_CHECKING
|
|
||||||
from typing_extensions import Literal, Self
|
|
||||||
|
|
||||||
from searx import locales
|
|
||||||
from searx.data import data_dir, ENGINE_TRAITS
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from . import Engine
|
|
||||||
|
|
||||||
|
|
||||||
class EngineTraitsEncoder(json.JSONEncoder):
|
|
||||||
"""Encodes :class:`EngineTraits` to a serializable object, see
|
|
||||||
:class:`json.JSONEncoder`."""
|
|
||||||
|
|
||||||
def default(self, o):
|
|
||||||
"""Return dictionary of a :class:`EngineTraits` object."""
|
|
||||||
if isinstance(o, EngineTraits):
|
|
||||||
return o.__dict__
|
|
||||||
return super().default(o)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class EngineTraits:
|
|
||||||
"""The class is intended to be instantiated for each engine."""
|
|
||||||
|
|
||||||
regions: Dict[str, str] = dataclasses.field(default_factory=dict)
|
|
||||||
"""Maps SearXNG's internal representation of a region to the one of the engine.
|
|
||||||
|
|
||||||
SearXNG's internal representation can be parsed by babel and the value is
|
|
||||||
send to the engine:
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
regions ={
|
|
||||||
'fr-BE' : <engine's region name>,
|
|
||||||
}
|
|
||||||
|
|
||||||
for key, egnine_region regions.items():
|
|
||||||
searxng_region = babel.Locale.parse(key, sep='-')
|
|
||||||
...
|
|
||||||
"""
|
|
||||||
|
|
||||||
languages: Dict[str, str] = dataclasses.field(default_factory=dict)
|
|
||||||
"""Maps SearXNG's internal representation of a language to the one of the engine.
|
|
||||||
|
|
||||||
SearXNG's internal representation can be parsed by babel and the value is
|
|
||||||
send to the engine:
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
languages = {
|
|
||||||
'ca' : <engine's language name>,
|
|
||||||
}
|
|
||||||
|
|
||||||
for key, egnine_lang in languages.items():
|
|
||||||
searxng_lang = babel.Locale.parse(key)
|
|
||||||
...
|
|
||||||
"""
|
|
||||||
|
|
||||||
all_locale: Optional[str] = None
|
|
||||||
"""To which locale value SearXNG's ``all`` language is mapped (shown a "Default
|
|
||||||
language").
|
|
||||||
"""
|
|
||||||
|
|
||||||
data_type: Literal['traits_v1'] = 'traits_v1'
|
|
||||||
"""Data type, default is 'traits_v1'.
|
|
||||||
"""
|
|
||||||
|
|
||||||
custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict)
|
|
||||||
"""A place to store engine's custom traits, not related to the SearXNG core.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_language(self, searxng_locale: str, default=None):
|
|
||||||
"""Return engine's language string that *best fits* to SearXNG's locale.
|
|
||||||
|
|
||||||
:param searxng_locale: SearXNG's internal representation of locale
|
|
||||||
selected by the user.
|
|
||||||
|
|
||||||
:param default: engine's default language
|
|
||||||
|
|
||||||
The *best fits* rules are implemented in
|
|
||||||
:py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
|
|
||||||
which is determined from :py:obj:`EngineTraits.all_locale`.
|
|
||||||
"""
|
|
||||||
if searxng_locale == 'all' and self.all_locale is not None:
|
|
||||||
return self.all_locale
|
|
||||||
return locales.get_engine_locale(searxng_locale, self.languages, default=default)
|
|
||||||
|
|
||||||
def get_region(self, searxng_locale: str, default=None):
|
|
||||||
"""Return engine's region string that best fits to SearXNG's locale.
|
|
||||||
|
|
||||||
:param searxng_locale: SearXNG's internal representation of locale
|
|
||||||
selected by the user.
|
|
||||||
|
|
||||||
:param default: engine's default region
|
|
||||||
|
|
||||||
The *best fits* rules are implemented in
|
|
||||||
:py:obj:`searx.locales.get_engine_locale`. Except for the special value ``all``
|
|
||||||
which is determined from :py:obj:`EngineTraits.all_locale`.
|
|
||||||
"""
|
|
||||||
if searxng_locale == 'all' and self.all_locale is not None:
|
|
||||||
return self.all_locale
|
|
||||||
return locales.get_engine_locale(searxng_locale, self.regions, default=default)
|
|
||||||
|
|
||||||
def is_locale_supported(self, searxng_locale: str) -> bool:
|
|
||||||
"""A *locale* (SearXNG's internal representation) is considered to be
|
|
||||||
supported by the engine if the *region* or the *language* is supported
|
|
||||||
by the engine.
|
|
||||||
|
|
||||||
For verification the functions :py:func:`EngineTraits.get_region` and
|
|
||||||
:py:func:`EngineTraits.get_language` are used.
|
|
||||||
"""
|
|
||||||
if self.data_type == 'traits_v1':
|
|
||||||
return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
|
|
||||||
|
|
||||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
|
||||||
|
|
||||||
def copy(self):
|
|
||||||
"""Create a copy of the dataclass object."""
|
|
||||||
return EngineTraits(**dataclasses.asdict(self))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
|
|
||||||
"""Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
|
|
||||||
and set properties from the origin engine in the object ``engine_traits``. If
|
|
||||||
function does not exists, ``None`` is returned.
|
|
||||||
"""
|
|
||||||
|
|
||||||
fetch_traits = getattr(engine, 'fetch_traits', None)
|
|
||||||
engine_traits = None
|
|
||||||
|
|
||||||
if fetch_traits:
|
|
||||||
engine_traits = cls()
|
|
||||||
fetch_traits(engine_traits)
|
|
||||||
return engine_traits
|
|
||||||
|
|
||||||
def set_traits(self, engine: Engine):
|
|
||||||
"""Set traits from self object in a :py:obj:`.Engine` namespace.
|
|
||||||
|
|
||||||
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
|
||||||
"""
|
|
||||||
|
|
||||||
if self.data_type == 'traits_v1':
|
|
||||||
self._set_traits_v1(engine)
|
|
||||||
else:
|
|
||||||
raise TypeError('engine traits of type %s is unknown' % self.data_type)
|
|
||||||
|
|
||||||
def _set_traits_v1(self, engine: Engine):
|
|
||||||
# For an engine, when there is `language: ...` in the YAML settings the engine
|
|
||||||
# does support only this one language (region)::
|
|
||||||
#
|
|
||||||
# - name: google italian
|
|
||||||
# engine: google
|
|
||||||
# language: it
|
|
||||||
# region: it-IT
|
|
||||||
|
|
||||||
traits = self.copy()
|
|
||||||
|
|
||||||
_msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
|
|
||||||
|
|
||||||
languages = traits.languages
|
|
||||||
if hasattr(engine, 'language'):
|
|
||||||
if engine.language not in languages:
|
|
||||||
raise ValueError(_msg % (engine.name, 'language', engine.language))
|
|
||||||
traits.languages = {engine.language: languages[engine.language]}
|
|
||||||
|
|
||||||
regions = traits.regions
|
|
||||||
if hasattr(engine, 'region'):
|
|
||||||
if engine.region not in regions:
|
|
||||||
raise ValueError(_msg % (engine.name, 'region', engine.region))
|
|
||||||
traits.regions = {engine.region: regions[engine.region]}
|
|
||||||
|
|
||||||
engine.language_support = bool(traits.languages or traits.regions)
|
|
||||||
|
|
||||||
# set the copied & modified traits in engine's namespace
|
|
||||||
engine.traits = traits
|
|
||||||
|
|
||||||
|
|
||||||
class EngineTraitsMap(Dict[str, EngineTraits]):
|
|
||||||
"""A python dictionary to map :class:`EngineTraits` by engine name."""
|
|
||||||
|
|
||||||
ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
|
|
||||||
"""File with persistence of the :py:obj:`EngineTraitsMap`."""
|
|
||||||
|
|
||||||
def save_data(self):
|
|
||||||
"""Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
|
|
||||||
with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
|
|
||||||
json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_data(cls) -> Self:
|
|
||||||
"""Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
|
|
||||||
obj = cls()
|
|
||||||
for k, v in ENGINE_TRAITS.items():
|
|
||||||
obj[k] = EngineTraits(**v)
|
|
||||||
return obj
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def fetch_traits(cls, log: Callable) -> Self:
|
|
||||||
from searx import engines # pylint: disable=cyclic-import, import-outside-toplevel
|
|
||||||
|
|
||||||
names = list(engines.engines)
|
|
||||||
names.sort()
|
|
||||||
obj = cls()
|
|
||||||
|
|
||||||
for engine_name in names:
|
|
||||||
engine = engines.engines[engine_name]
|
|
||||||
|
|
||||||
traits = EngineTraits.fetch_traits(engine)
|
|
||||||
if traits is not None:
|
|
||||||
log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
|
|
||||||
log("%-20s: SearXNG regions --> %s" % (engine_name, len(traits.regions)))
|
|
||||||
obj[engine_name] = traits
|
|
||||||
|
|
||||||
return obj
|
|
||||||
|
|
||||||
def set_traits(self, engine: Engine | types.ModuleType):
|
|
||||||
"""Set traits in a :py:obj:`Engine` namespace.
|
|
||||||
|
|
||||||
:param engine: engine instance build by :py:func:`searx.engines.load_engine`
|
|
||||||
"""
|
|
||||||
|
|
||||||
engine_traits = EngineTraits(data_type='traits_v1')
|
|
||||||
if engine.name in self.keys():
|
|
||||||
engine_traits = self[engine.name]
|
|
||||||
|
|
||||||
elif engine.engine in self.keys():
|
|
||||||
# The key of the dictionary traits_map is the *engine name*
|
|
||||||
# configured in settings.xml. When multiple engines are configured
|
|
||||||
# in settings.yml to use the same origin engine (python module)
|
|
||||||
# these additional engines can use the languages from the origin
|
|
||||||
# engine. For this use the configured ``engine: ...`` from
|
|
||||||
# settings.yml
|
|
||||||
engine_traits = self[engine.engine]
|
|
||||||
|
|
||||||
engine_traits.set_traits(engine)
|
|
||||||
@@ -1,57 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
1337x
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import quote, urljoin
|
|
||||||
from lxml import html
|
|
||||||
from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://1337x.to/',
|
|
||||||
"wikidata_id": 'Q28134166',
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
url = 'https://1337x.to/'
|
|
||||||
search_url = url + 'search/{search_term}/{pageno}/'
|
|
||||||
categories = ['files']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for result in eval_xpath_list(dom, '//table[contains(@class, "table-list")]/tbody//tr'):
|
|
||||||
href = urljoin(url, eval_xpath_getindex(result, './td[contains(@class, "name")]/a[2]/@href', 0))
|
|
||||||
title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]'))
|
|
||||||
seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]'))
|
|
||||||
leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]'))
|
|
||||||
filesize_info = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()'))
|
|
||||||
filesize, filesize_multiplier = filesize_info.split()
|
|
||||||
filesize = get_torrent_size(filesize, filesize_multiplier)
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': href,
|
|
||||||
'title': title,
|
|
||||||
'seed': seed,
|
|
||||||
'leech': leech,
|
|
||||||
'filesize': filesize,
|
|
||||||
'template': 'torrent.html',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,77 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
# pylint: disable=invalid-name
|
|
||||||
"""9GAG (social media)"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from datetime import datetime
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://9gag.com/',
|
|
||||||
"wikidata_id": 'Q277421',
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['social media']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
search_url = "https://9gag.com/v1/search-posts?{query}"
|
|
||||||
page_size = 10
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
query = urlencode({'query': query, 'c': (params['pageno'] - 1) * page_size})
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=query)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
json_results = loads(resp.text)['data']
|
|
||||||
|
|
||||||
for result in json_results['posts']:
|
|
||||||
result_type = result['type']
|
|
||||||
|
|
||||||
# Get the not cropped version of the thumbnail when the image height is not too important
|
|
||||||
if result['images']['image700']['height'] > 400:
|
|
||||||
thumbnail = result['images']['imageFbThumbnail']['url']
|
|
||||||
else:
|
|
||||||
thumbnail = result['images']['image700']['url']
|
|
||||||
|
|
||||||
if result_type == 'Photo':
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'images.html',
|
|
||||||
'url': result['url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': result['description'],
|
|
||||||
'publishedDate': datetime.utcfromtimestamp(result['creationTs']),
|
|
||||||
'img_src': result['images']['image700']['url'],
|
|
||||||
'thumbnail_src': thumbnail,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
elif result_type == 'Animated':
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'videos.html',
|
|
||||||
'url': result['url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': result['description'],
|
|
||||||
'publishedDate': datetime.utcfromtimestamp(result['creationTs']),
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'iframe_src': result['images'].get('image460sv', {}).get('url'),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
if 'tags' in json_results:
|
|
||||||
for suggestion in json_results['tags']:
|
|
||||||
results.append({'suggestion': suggestion['key']})
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,253 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Load and initialize the ``engines``, see :py:func:`load_engines` and register
|
|
||||||
:py:obj:`engine_shortcuts`.
|
|
||||||
|
|
||||||
usage::
|
|
||||||
|
|
||||||
load_engines( settings['engines'] )
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import copy
|
|
||||||
from os.path import realpath, dirname
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Dict
|
|
||||||
import types
|
|
||||||
import inspect
|
|
||||||
|
|
||||||
from searx import logger, settings
|
|
||||||
from searx.utils import load_module
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from searx.enginelib import Engine
|
|
||||||
|
|
||||||
logger = logger.getChild('engines')
|
|
||||||
ENGINE_DIR = dirname(realpath(__file__))
|
|
||||||
ENGINE_DEFAULT_ARGS = {
|
|
||||||
# Common options in the engine module
|
|
||||||
"engine_type": "online",
|
|
||||||
"paging": False,
|
|
||||||
"time_range_support": False,
|
|
||||||
"safesearch": False,
|
|
||||||
# settings.yml
|
|
||||||
"categories": ["general"],
|
|
||||||
"enable_http": False,
|
|
||||||
"shortcut": "-",
|
|
||||||
"timeout": settings["outgoing"]["request_timeout"],
|
|
||||||
"display_error_messages": True,
|
|
||||||
"disabled": False,
|
|
||||||
"inactive": False,
|
|
||||||
"about": {},
|
|
||||||
"using_tor_proxy": False,
|
|
||||||
"send_accept_language_header": False,
|
|
||||||
"tokens": [],
|
|
||||||
}
|
|
||||||
# set automatically when an engine does not have any tab category
|
|
||||||
DEFAULT_CATEGORY = 'other'
|
|
||||||
|
|
||||||
|
|
||||||
# Defaults for the namespace of an engine module, see :py:func:`load_engine`
|
|
||||||
|
|
||||||
categories = {'general': []}
|
|
||||||
engines: Dict[str, Engine | types.ModuleType] = {}
|
|
||||||
engine_shortcuts = {}
|
|
||||||
"""Simple map of registered *shortcuts* to name of the engine (or ``None``).
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
engine_shortcuts[engine.shortcut] = engine.name
|
|
||||||
|
|
||||||
:meta hide-value:
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def check_engine_module(module: types.ModuleType):
|
|
||||||
# probe unintentional name collisions / for example name collisions caused
|
|
||||||
# by import statements in the engine module ..
|
|
||||||
|
|
||||||
# network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
|
|
||||||
obj = getattr(module, 'network', None)
|
|
||||||
if obj and inspect.ismodule(obj):
|
|
||||||
msg = f'type of {module.__name__}.network is a module ({obj.__name__}), expected a string'
|
|
||||||
# logger.error(msg)
|
|
||||||
raise TypeError(msg)
|
|
||||||
|
|
||||||
|
|
||||||
def load_engine(engine_data: dict) -> Engine | types.ModuleType | None:
|
|
||||||
"""Load engine from ``engine_data``.
|
|
||||||
|
|
||||||
:param dict engine_data: Attributes from YAML ``settings:engines/<engine>``
|
|
||||||
:return: initialized namespace of the ``<engine>``.
|
|
||||||
|
|
||||||
1. create a namespace and load module of the ``<engine>``
|
|
||||||
2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS`
|
|
||||||
3. update namespace with values from ``engine_data``
|
|
||||||
|
|
||||||
If engine *is active*, return namespace of the engine, otherwise return
|
|
||||||
``None``.
|
|
||||||
|
|
||||||
This function also returns ``None`` if initialization of the namespace fails
|
|
||||||
for one of the following reasons:
|
|
||||||
|
|
||||||
- engine name contains underscore
|
|
||||||
- engine name is not lowercase
|
|
||||||
- required attribute is not set :py:func:`is_missing_required_attributes`
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=too-many-return-statements
|
|
||||||
|
|
||||||
engine_name = engine_data.get('name')
|
|
||||||
if engine_name is None:
|
|
||||||
logger.error('An engine does not have a "name" field')
|
|
||||||
return None
|
|
||||||
if '_' in engine_name:
|
|
||||||
logger.error('Engine name contains underscore: "{}"'.format(engine_name))
|
|
||||||
return None
|
|
||||||
|
|
||||||
if engine_name.lower() != engine_name:
|
|
||||||
logger.warning('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
|
|
||||||
engine_name = engine_name.lower()
|
|
||||||
engine_data['name'] = engine_name
|
|
||||||
|
|
||||||
# load_module
|
|
||||||
module_name = engine_data.get('engine')
|
|
||||||
if module_name is None:
|
|
||||||
logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name))
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
engine = load_module(module_name + '.py', ENGINE_DIR)
|
|
||||||
except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
|
|
||||||
logger.exception('Fatal exception in engine "{}"'.format(module_name))
|
|
||||||
sys.exit(1)
|
|
||||||
except BaseException:
|
|
||||||
logger.exception('Cannot load engine "{}"'.format(module_name))
|
|
||||||
return None
|
|
||||||
|
|
||||||
check_engine_module(engine)
|
|
||||||
update_engine_attributes(engine, engine_data)
|
|
||||||
update_attributes_for_tor(engine)
|
|
||||||
|
|
||||||
# avoid cyclic imports
|
|
||||||
# pylint: disable=import-outside-toplevel
|
|
||||||
from searx.enginelib.traits import EngineTraitsMap
|
|
||||||
|
|
||||||
trait_map = EngineTraitsMap.from_data()
|
|
||||||
trait_map.set_traits(engine)
|
|
||||||
|
|
||||||
if not is_engine_active(engine):
|
|
||||||
return None
|
|
||||||
|
|
||||||
if is_missing_required_attributes(engine):
|
|
||||||
return None
|
|
||||||
|
|
||||||
set_loggers(engine, engine_name)
|
|
||||||
|
|
||||||
if not any(cat in settings['categories_as_tabs'] for cat in engine.categories):
|
|
||||||
engine.categories.append(DEFAULT_CATEGORY)
|
|
||||||
|
|
||||||
return engine
|
|
||||||
|
|
||||||
|
|
||||||
def set_loggers(engine, engine_name):
|
|
||||||
# set the logger for engine
|
|
||||||
engine.logger = logger.getChild(engine_name)
|
|
||||||
# the engine may have load some other engines
|
|
||||||
# may sure the logger is initialized
|
|
||||||
# use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"
|
|
||||||
# see https://github.com/python/cpython/issues/89516
|
|
||||||
# and https://docs.python.org/3.10/library/sys.html#sys.modules
|
|
||||||
modules = sys.modules.copy()
|
|
||||||
for module_name, module in modules.items():
|
|
||||||
if (
|
|
||||||
module_name.startswith("searx.engines")
|
|
||||||
and module_name != "searx.engines.__init__"
|
|
||||||
and not hasattr(module, "logger")
|
|
||||||
):
|
|
||||||
module_engine_name = module_name.split(".")[-1]
|
|
||||||
module.logger = logger.getChild(module_engine_name) # type: ignore
|
|
||||||
|
|
||||||
|
|
||||||
def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
|
|
||||||
# set engine attributes from engine_data
|
|
||||||
for param_name, param_value in engine_data.items():
|
|
||||||
if param_name == 'categories':
|
|
||||||
if isinstance(param_value, str):
|
|
||||||
param_value = list(map(str.strip, param_value.split(',')))
|
|
||||||
engine.categories = param_value # type: ignore
|
|
||||||
elif hasattr(engine, 'about') and param_name == 'about':
|
|
||||||
engine.about = {**engine.about, **engine_data['about']} # type: ignore
|
|
||||||
else:
|
|
||||||
setattr(engine, param_name, param_value)
|
|
||||||
|
|
||||||
# set default attributes
|
|
||||||
for arg_name, arg_value in ENGINE_DEFAULT_ARGS.items():
|
|
||||||
if not hasattr(engine, arg_name):
|
|
||||||
setattr(engine, arg_name, copy.deepcopy(arg_value))
|
|
||||||
|
|
||||||
|
|
||||||
def update_attributes_for_tor(engine: Engine | types.ModuleType):
|
|
||||||
if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
|
|
||||||
engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') # type: ignore
|
|
||||||
engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) # type: ignore
|
|
||||||
|
|
||||||
|
|
||||||
def is_missing_required_attributes(engine):
|
|
||||||
"""An attribute is required when its name doesn't start with ``_`` (underline).
|
|
||||||
Required attributes must not be ``None``.
|
|
||||||
|
|
||||||
"""
|
|
||||||
missing = False
|
|
||||||
for engine_attr in dir(engine):
|
|
||||||
if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None:
|
|
||||||
logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr))
|
|
||||||
missing = True
|
|
||||||
return missing
|
|
||||||
|
|
||||||
|
|
||||||
def using_tor_proxy(engine: Engine | types.ModuleType):
|
|
||||||
"""Return True if the engine configuration declares to use Tor."""
|
|
||||||
return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
|
|
||||||
|
|
||||||
|
|
||||||
def is_engine_active(engine: Engine | types.ModuleType):
|
|
||||||
# check if engine is inactive
|
|
||||||
if engine.inactive is True:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# exclude onion engines if not using tor
|
|
||||||
if 'onions' in engine.categories and not using_tor_proxy(engine):
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def register_engine(engine: Engine | types.ModuleType):
|
|
||||||
if engine.name in engines:
|
|
||||||
logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
|
|
||||||
sys.exit(1)
|
|
||||||
engines[engine.name] = engine
|
|
||||||
|
|
||||||
if engine.shortcut in engine_shortcuts:
|
|
||||||
logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut))
|
|
||||||
sys.exit(1)
|
|
||||||
engine_shortcuts[engine.shortcut] = engine.name
|
|
||||||
|
|
||||||
for category_name in engine.categories:
|
|
||||||
categories.setdefault(category_name, []).append(engine)
|
|
||||||
|
|
||||||
|
|
||||||
def load_engines(engine_list):
|
|
||||||
"""usage: ``engine_list = settings['engines']``"""
|
|
||||||
engines.clear()
|
|
||||||
engine_shortcuts.clear()
|
|
||||||
categories.clear()
|
|
||||||
categories['general'] = []
|
|
||||||
for engine_data in engine_list:
|
|
||||||
engine = load_engine(engine_data)
|
|
||||||
if engine:
|
|
||||||
register_engine(engine)
|
|
||||||
return engines
|
|
||||||
@@ -1,80 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
Ahmia (Onions)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urlencode, urlparse, parse_qs
|
|
||||||
from lxml.html import fromstring
|
|
||||||
from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion',
|
|
||||||
"wikidata_id": 'Q18693938',
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine config
|
|
||||||
categories = ['onions']
|
|
||||||
paging = True
|
|
||||||
page_size = 10
|
|
||||||
|
|
||||||
# search url
|
|
||||||
search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}'
|
|
||||||
time_range_support = True
|
|
||||||
time_range_dict = {'day': 1, 'week': 7, 'month': 30}
|
|
||||||
|
|
||||||
# xpaths
|
|
||||||
results_xpath = '//li[@class="result"]'
|
|
||||||
url_xpath = './h4/a/@href'
|
|
||||||
title_xpath = './h4/a[1]'
|
|
||||||
content_xpath = './/p[1]'
|
|
||||||
correction_xpath = '//*[@id="didYouMean"]//a'
|
|
||||||
number_of_results_xpath = '//*[@id="totalResults"]'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}))
|
|
||||||
|
|
||||||
if params['time_range'] in time_range_dict:
|
|
||||||
params['url'] += '&' + urlencode({'d': time_range_dict[params['time_range']]})
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
dom = fromstring(resp.text)
|
|
||||||
|
|
||||||
# trim results so there's not way too many at once
|
|
||||||
first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1)
|
|
||||||
all_results = eval_xpath_list(dom, results_xpath)
|
|
||||||
trimmed_results = all_results[first_result_index : first_result_index + page_size]
|
|
||||||
|
|
||||||
# get results
|
|
||||||
for result in trimmed_results:
|
|
||||||
# remove ahmia url and extract the actual url for the result
|
|
||||||
raw_url = extract_url(eval_xpath_list(result, url_xpath, min_len=1), search_url)
|
|
||||||
cleaned_url = parse_qs(urlparse(raw_url).query).get('redirect_url', [''])[0]
|
|
||||||
|
|
||||||
title = extract_text(eval_xpath(result, title_xpath))
|
|
||||||
content = extract_text(eval_xpath(result, content_xpath))
|
|
||||||
|
|
||||||
results.append({'url': cleaned_url, 'title': title, 'content': content, 'is_onion': True})
|
|
||||||
|
|
||||||
# get spelling corrections
|
|
||||||
for correction in eval_xpath_list(dom, correction_xpath):
|
|
||||||
results.append({'correction': extract_text(correction)})
|
|
||||||
|
|
||||||
# get number of results
|
|
||||||
number_of_results = eval_xpath(dom, number_of_results_xpath)
|
|
||||||
if number_of_results:
|
|
||||||
try:
|
|
||||||
results.append({'number_of_results': int(extract_text(number_of_results))})
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,187 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""`Anna's Archive`_ is a free non-profit online shadow library metasearch
|
|
||||||
engine providing access to a variety of book resources (also via IPFS), created
|
|
||||||
by a team of anonymous archivists (AnnaArchivist_).
|
|
||||||
|
|
||||||
.. _Anna's Archive: https://annas-archive.org/
|
|
||||||
.. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive
|
|
||||||
|
|
||||||
Configuration
|
|
||||||
=============
|
|
||||||
|
|
||||||
The engine has the following additional settings:
|
|
||||||
|
|
||||||
- :py:obj:`aa_content`
|
|
||||||
- :py:obj:`aa_ext`
|
|
||||||
- :py:obj:`aa_sort`
|
|
||||||
|
|
||||||
With this options a SearXNG maintainer is able to configure **additional**
|
|
||||||
engines for specific searches in Anna's Archive. For example a engine to search
|
|
||||||
for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: annas articles
|
|
||||||
engine: annas_archive
|
|
||||||
shortcut: aaa
|
|
||||||
aa_content: 'journal_article'
|
|
||||||
aa_ext: 'pdf'
|
|
||||||
aa_sort: 'newest'
|
|
||||||
|
|
||||||
Implementations
|
|
||||||
===============
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import List, Dict, Any, Optional
|
|
||||||
from urllib.parse import quote
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from searx.utils import extract_text, eval_xpath, eval_xpath_list
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.data import ENGINE_TRAITS
|
|
||||||
|
|
||||||
# about
|
|
||||||
about: Dict[str, Any] = {
|
|
||||||
"website": "https://annas-archive.org/",
|
|
||||||
"wikidata_id": "Q115288326",
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": "HTML",
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories: List[str] = ["files"]
|
|
||||||
paging: bool = False
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url: str = "https://annas-archive.org"
|
|
||||||
aa_content: str = ""
|
|
||||||
"""Anan's search form field **Content** / possible values::
|
|
||||||
|
|
||||||
journal_article, book_any, book_fiction, book_unknown, book_nonfiction,
|
|
||||||
book_comic, magazine, standards_document
|
|
||||||
|
|
||||||
To not filter use an empty string (default).
|
|
||||||
"""
|
|
||||||
aa_sort: str = ''
|
|
||||||
"""Sort Anna's results, possible values::
|
|
||||||
|
|
||||||
newest, oldest, largest, smallest
|
|
||||||
|
|
||||||
To sort by *most relevant* use an empty string (default)."""
|
|
||||||
|
|
||||||
aa_ext: str = ''
|
|
||||||
"""Filter Anna's results by a file ending. Common filters for example are
|
|
||||||
``pdf`` and ``epub``.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
Anna's Archive is a beta release: Filter results by file extension does not
|
|
||||||
really work on Anna's Archive.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings=None): # pylint: disable=unused-argument
|
|
||||||
"""Check of engine's settings."""
|
|
||||||
traits = EngineTraits(**ENGINE_TRAITS['annas archive'])
|
|
||||||
|
|
||||||
if aa_content and aa_content not in traits.custom['content']:
|
|
||||||
raise ValueError(f'invalid setting content: {aa_content}')
|
|
||||||
|
|
||||||
if aa_sort and aa_sort not in traits.custom['sort']:
|
|
||||||
raise ValueError(f'invalid setting sort: {aa_sort}')
|
|
||||||
|
|
||||||
if aa_ext and aa_ext not in traits.custom['ext']:
|
|
||||||
raise ValueError(f'invalid setting ext: {aa_ext}')
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
q = quote(query)
|
|
||||||
lang = traits.get_language(params["language"], traits.all_locale) # type: ignore
|
|
||||||
params["url"] = base_url + f"/search?lang={lang or ''}&content={aa_content}&ext={aa_ext}&sort={aa_sort}&q={q}"
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp) -> List[Dict[str, Optional[str]]]:
|
|
||||||
results: List[Dict[str, Optional[str]]] = []
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for item in eval_xpath_list(dom, '//main//div[contains(@class, "h-[125]")]/a'):
|
|
||||||
results.append(_get_result(item))
|
|
||||||
|
|
||||||
# The rendering of the WEB page is very strange; except the first position
|
|
||||||
# all other positions of Anna's result page are enclosed in SGML comments.
|
|
||||||
# These comments are *uncommented* by some JS code, see query of class
|
|
||||||
# '.js-scroll-hidden' in Anna's HTML template:
|
|
||||||
# https://annas-software.org/AnnaArchivist/annas-archive/-/blob/main/allthethings/templates/macros/md5_list.html
|
|
||||||
|
|
||||||
for item in eval_xpath_list(dom, '//main//div[contains(@class, "js-scroll-hidden")]'):
|
|
||||||
item = html.fromstring(item.xpath('./comment()')[0].text)
|
|
||||||
results.append(_get_result(item))
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def _get_result(item):
|
|
||||||
return {
|
|
||||||
'template': 'paper.html',
|
|
||||||
'url': base_url + item.xpath('./@href')[0],
|
|
||||||
'title': extract_text(eval_xpath(item, './/h3/text()[1]')),
|
|
||||||
'publisher': extract_text(eval_xpath(item, './/div[contains(@class, "text-sm")]')),
|
|
||||||
'authors': [extract_text(eval_xpath(item, './/div[contains(@class, "italic")]'))],
|
|
||||||
'content': extract_text(eval_xpath(item, './/div[contains(@class, "text-xs")]')),
|
|
||||||
'img_src': item.xpath('.//img/@src')[0],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch languages and other search arguments from Anna's search form."""
|
|
||||||
# pylint: disable=import-outside-toplevel
|
|
||||||
|
|
||||||
import babel
|
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
|
||||||
from searx.locales import language_tag
|
|
||||||
|
|
||||||
engine_traits.all_locale = ''
|
|
||||||
engine_traits.custom['content'] = []
|
|
||||||
engine_traits.custom['ext'] = []
|
|
||||||
engine_traits.custom['sort'] = []
|
|
||||||
|
|
||||||
resp = get(base_url + '/search')
|
|
||||||
if not resp.ok: # type: ignore
|
|
||||||
raise RuntimeError("Response from Anna's search page is not OK.")
|
|
||||||
dom = html.fromstring(resp.text) # type: ignore
|
|
||||||
|
|
||||||
# supported language codes
|
|
||||||
|
|
||||||
lang_map = {}
|
|
||||||
for x in eval_xpath_list(dom, "//form//select[@name='lang']//option"):
|
|
||||||
eng_lang = x.get("value")
|
|
||||||
if eng_lang in ('', '_empty', 'nl-BE', 'und'):
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
|
|
||||||
except babel.UnknownLocaleError:
|
|
||||||
# silently ignore unknown languages
|
|
||||||
# print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
|
|
||||||
continue
|
|
||||||
sxng_lang = language_tag(locale)
|
|
||||||
conflict = engine_traits.languages.get(sxng_lang)
|
|
||||||
if conflict:
|
|
||||||
if conflict != eng_lang:
|
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
|
|
||||||
continue
|
|
||||||
engine_traits.languages[sxng_lang] = eng_lang
|
|
||||||
|
|
||||||
for x in eval_xpath_list(dom, "//form//select[@name='content']//option"):
|
|
||||||
engine_traits.custom['content'].append(x.get("value"))
|
|
||||||
|
|
||||||
for x in eval_xpath_list(dom, "//form//select[@name='ext']//option"):
|
|
||||||
engine_traits.custom['ext'].append(x.get("value"))
|
|
||||||
|
|
||||||
for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
|
|
||||||
engine_traits.custom['sort'].append(x.get("value"))
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""APKMirror
|
|
||||||
"""
|
|
||||||
|
|
||||||
# pylint: disable=invalid-name
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from searx.utils import (
|
|
||||||
eval_xpath_list,
|
|
||||||
eval_xpath_getindex,
|
|
||||||
extract_text,
|
|
||||||
)
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.apkmirror.com',
|
|
||||||
"wikidata_id": None,
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['files', 'apps']
|
|
||||||
paging = True
|
|
||||||
time_range_support = False
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.apkmirror.com'
|
|
||||||
search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(
|
|
||||||
pageno=params['pageno'],
|
|
||||||
query=urlencode({'s': query}),
|
|
||||||
)
|
|
||||||
logger.debug("query_url --> %s", params['url'])
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in eval_xpath_list(dom, "//div[@id='content']//div[@class='listWidget']/div/div[@class='appRow']"):
|
|
||||||
|
|
||||||
link = eval_xpath_getindex(result, './/h5/a', 0)
|
|
||||||
|
|
||||||
url = base_url + link.attrib.get('href') + '#downloads'
|
|
||||||
title = extract_text(link)
|
|
||||||
img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
|
|
||||||
res = {'url': url, 'title': title, 'img_src': img_src}
|
|
||||||
|
|
||||||
results.append(res)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,57 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Apple App Store
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from dateutil.parser import parse
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.apple.com/app-store/',
|
|
||||||
"wikidata_id": 'Q368215',
|
|
||||||
"official_api_documentation": (
|
|
||||||
'https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/'
|
|
||||||
'iTuneSearchAPI/UnderstandingSearchResults.html#//apple_ref/doc/uid/TP40017632-CH8-SW1'
|
|
||||||
),
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['files', 'apps']
|
|
||||||
safesearch = True
|
|
||||||
|
|
||||||
search_url = 'https://itunes.apple.com/search?{query}'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
explicit = "Yes"
|
|
||||||
|
|
||||||
if params['safesearch'] > 0:
|
|
||||||
explicit = "No"
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'term': query, 'media': 'software', 'explicit': explicit}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
json_result = loads(resp.text)
|
|
||||||
|
|
||||||
for result in json_result['results']:
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': result['trackViewUrl'],
|
|
||||||
'title': result['trackName'],
|
|
||||||
'content': result['description'],
|
|
||||||
'img_src': result['artworkUrl100'],
|
|
||||||
'publishedDate': parse(result['currentVersionReleaseDate']),
|
|
||||||
'author': result['sellerName'],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,113 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Apple Maps"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from time import time
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
from searx.network import get as http_get
|
|
||||||
from searx.engines.openstreetmap import get_key_label
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.apple.com/maps/',
|
|
||||||
"wikidata_id": 'Q276101',
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
token = {'value': '', 'last_updated': None}
|
|
||||||
|
|
||||||
categories = ['map']
|
|
||||||
paging = False
|
|
||||||
|
|
||||||
search_url = "https://api.apple-mapkit.com/v1/search?{query}&mkjsVersion=5.72.53"
|
|
||||||
|
|
||||||
|
|
||||||
def obtain_token():
|
|
||||||
update_time = time() - (time() % 1800)
|
|
||||||
try:
|
|
||||||
# use duckduckgo's mapkit token
|
|
||||||
token_response = http_get('https://duckduckgo.com/local.js?get_mk_token=1', timeout=2.0)
|
|
||||||
actual_token = http_get(
|
|
||||||
'https://cdn.apple-mapkit.com/ma/bootstrap?apiVersion=2&mkjsVersion=5.72.53&poi=1',
|
|
||||||
timeout=2.0,
|
|
||||||
headers={'Authorization': 'Bearer ' + token_response.text},
|
|
||||||
)
|
|
||||||
token['value'] = loads(actual_token.text)['authInfo']['access_token']
|
|
||||||
token['last_updated'] = update_time
|
|
||||||
# pylint: disable=bare-except
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
if time() - (token['last_updated'] or 0) > 1800:
|
|
||||||
obtain_token()
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query, 'lang': params['language']}))
|
|
||||||
|
|
||||||
params['headers'] = {'Authorization': 'Bearer ' + token['value']}
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
resp_json = loads(resp.text)
|
|
||||||
|
|
||||||
user_language = resp.search_params['language']
|
|
||||||
|
|
||||||
for result in resp_json['results']:
|
|
||||||
boundingbox = None
|
|
||||||
if 'displayMapRegion' in result:
|
|
||||||
box = result['displayMapRegion']
|
|
||||||
boundingbox = [box['southLat'], box['northLat'], box['westLng'], box['eastLng']]
|
|
||||||
|
|
||||||
links = []
|
|
||||||
if 'telephone' in result:
|
|
||||||
telephone = result['telephone']
|
|
||||||
links.append(
|
|
||||||
{
|
|
||||||
'label': get_key_label('phone', user_language),
|
|
||||||
'url': 'tel:' + telephone,
|
|
||||||
'url_label': telephone,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if result.get('urls'):
|
|
||||||
url = result['urls'][0]
|
|
||||||
links.append(
|
|
||||||
{
|
|
||||||
'label': get_key_label('website', user_language),
|
|
||||||
'url': url,
|
|
||||||
'url_label': url,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'map.html',
|
|
||||||
'type': result.get('poiCategory'),
|
|
||||||
'title': result['name'],
|
|
||||||
'links': links,
|
|
||||||
'latitude': result['center']['lat'],
|
|
||||||
'longitude': result['center']['lng'],
|
|
||||||
'url': result['placecardUrl'],
|
|
||||||
'boundingbox': boundingbox,
|
|
||||||
'geojson': {'type': 'Point', 'coordinates': [result['center']['lng'], result['center']['lat']]},
|
|
||||||
'address': {
|
|
||||||
'name': result['name'],
|
|
||||||
'house_number': result.get('subThoroughfare'),
|
|
||||||
'road': result.get('thoroughfare'),
|
|
||||||
'locality': result.get('locality'),
|
|
||||||
'postcode': result.get('postCode'),
|
|
||||||
'country': result.get('country'),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,152 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Arch Linux Wiki
|
|
||||||
~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
This implementation does not use a official API: Mediawiki provides API, but
|
|
||||||
Arch Wiki blocks access to it.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from urllib.parse import urlencode, urljoin, urlparse
|
|
||||||
import lxml
|
|
||||||
import babel
|
|
||||||
|
|
||||||
from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.locales import language_tag
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://wiki.archlinux.org/',
|
|
||||||
"wikidata_id": 'Q101445877',
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['it', 'software wikis']
|
|
||||||
paging = True
|
|
||||||
main_wiki = 'wiki.archlinux.org'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
sxng_lang = params['searxng_locale'].split('-')[0]
|
|
||||||
netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
|
|
||||||
title: str = traits.custom['title'].get(sxng_lang, 'Special:Search') # type: ignore
|
|
||||||
base_url = 'https://' + netloc + '/index.php?'
|
|
||||||
offset = (params['pageno'] - 1) * 20
|
|
||||||
|
|
||||||
if netloc == main_wiki:
|
|
||||||
eng_lang: str = traits.get_language(sxng_lang, 'English') # type: ignore
|
|
||||||
query += ' (' + eng_lang + ')'
|
|
||||||
elif netloc == 'wiki.archlinuxcn.org':
|
|
||||||
base_url = 'https://' + netloc + '/wzh/index.php?'
|
|
||||||
|
|
||||||
args = {
|
|
||||||
'search': query,
|
|
||||||
'title': title,
|
|
||||||
'limit': 20,
|
|
||||||
'offset': offset,
|
|
||||||
'profile': 'default',
|
|
||||||
}
|
|
||||||
|
|
||||||
params['url'] = base_url + urlencode(args)
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
|
|
||||||
results = []
|
|
||||||
dom = lxml.html.fromstring(resp.text) # type: ignore
|
|
||||||
|
|
||||||
# get the base URL for the language in which request was made
|
|
||||||
sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
|
|
||||||
netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) # type: ignore
|
|
||||||
base_url = 'https://' + netloc + '/index.php?'
|
|
||||||
|
|
||||||
for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
|
|
||||||
link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0)
|
|
||||||
content = extract_text(result.xpath('.//div[@class="searchresult"]'))
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': urljoin(base_url, link.get('href')), # type: ignore
|
|
||||||
'title': extract_text(link),
|
|
||||||
'content': content,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch languages from Archlinix-Wiki. The location of the Wiki address of a
|
|
||||||
language is mapped in a :py:obj:`custom field
|
|
||||||
<searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``). Depending
|
|
||||||
on the location, the ``title`` argument in the request is translated.
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
"custom": {
|
|
||||||
"wiki_netloc": {
|
|
||||||
"de": "wiki.archlinux.de",
|
|
||||||
# ...
|
|
||||||
"zh": "wiki.archlinuxcn.org"
|
|
||||||
}
|
|
||||||
"title": {
|
|
||||||
"de": "Spezial:Suche",
|
|
||||||
# ...
|
|
||||||
"zh": "Special:\u641c\u7d22"
|
|
||||||
},
|
|
||||||
},
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=import-outside-toplevel
|
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
|
||||||
|
|
||||||
engine_traits.custom['wiki_netloc'] = {}
|
|
||||||
engine_traits.custom['title'] = {}
|
|
||||||
|
|
||||||
title_map = {
|
|
||||||
'de': 'Spezial:Suche',
|
|
||||||
'fa': 'ویژه:جستجو',
|
|
||||||
'ja': '特別:検索',
|
|
||||||
'zh': 'Special:搜索',
|
|
||||||
}
|
|
||||||
|
|
||||||
resp = get('https://wiki.archlinux.org/')
|
|
||||||
if not resp.ok: # type: ignore
|
|
||||||
print("ERROR: response from wiki.archlinix.org is not OK.")
|
|
||||||
|
|
||||||
dom = lxml.html.fromstring(resp.text) # type: ignore
|
|
||||||
for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
|
|
||||||
|
|
||||||
sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
|
|
||||||
# zh_Hans --> zh
|
|
||||||
sxng_tag = sxng_tag.split('_')[0]
|
|
||||||
|
|
||||||
netloc = urlparse(a.get('href')).netloc
|
|
||||||
if netloc != 'wiki.archlinux.org':
|
|
||||||
title = title_map.get(sxng_tag)
|
|
||||||
if not title:
|
|
||||||
print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
|
|
||||||
continue
|
|
||||||
engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
|
|
||||||
engine_traits.custom['title'][sxng_tag] = title # type: ignore
|
|
||||||
|
|
||||||
eng_tag = extract_text(eval_xpath_list(a, ".//span"))
|
|
||||||
engine_traits.languages[sxng_tag] = eng_tag # type: ignore
|
|
||||||
|
|
||||||
engine_traits.languages['en'] = 'English'
|
|
||||||
@@ -1,69 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""The Art Institute of Chicago
|
|
||||||
|
|
||||||
Explore thousands of artworks from The Art Institute of Chicago.
|
|
||||||
|
|
||||||
* https://artic.edu
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.artic.edu',
|
|
||||||
"wikidata_id": 'Q239303',
|
|
||||||
"official_api_documentation": 'http://api.artic.edu/docs/',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['images']
|
|
||||||
paging = True
|
|
||||||
nb_per_page = 20
|
|
||||||
|
|
||||||
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
|
|
||||||
image_api = 'https://www.artic.edu/iiif/2/'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
args = urlencode(
|
|
||||||
{
|
|
||||||
'q': query,
|
|
||||||
'page': params['pageno'],
|
|
||||||
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
|
|
||||||
'limit': nb_per_page,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
params['url'] = search_api + args
|
|
||||||
|
|
||||||
logger.debug("query_url --> %s", params['url'])
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
|
|
||||||
results = []
|
|
||||||
json_data = loads(resp.text)
|
|
||||||
|
|
||||||
for result in json_data['data']:
|
|
||||||
|
|
||||||
if not result['image_id']:
|
|
||||||
continue
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': 'https://artic.edu/artworks/%(id)s' % result,
|
|
||||||
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
|
|
||||||
'content': result['medium_display'],
|
|
||||||
'author': ', '.join(result['artist_titles']),
|
|
||||||
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
|
|
||||||
'img_format': result['dimensions'],
|
|
||||||
'template': 'images.html',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,109 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
ArXiV (Scientific preprints)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
from lxml.etree import XPath
|
|
||||||
from datetime import datetime
|
|
||||||
from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://arxiv.org',
|
|
||||||
"wikidata_id": 'Q118398',
|
|
||||||
"official_api_documentation": 'https://arxiv.org/help/api',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'XML-RSS',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['science', 'scientific publications']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
base_url = (
|
|
||||||
'https://export.arxiv.org/api/query?search_query=all:' + '{query}&start={offset}&max_results={number_of_results}'
|
|
||||||
)
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
number_of_results = 10
|
|
||||||
|
|
||||||
# xpaths
|
|
||||||
arxiv_namespaces = {
|
|
||||||
"atom": "http://www.w3.org/2005/Atom",
|
|
||||||
"arxiv": "http://arxiv.org/schemas/atom",
|
|
||||||
}
|
|
||||||
xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
|
|
||||||
xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
|
|
||||||
xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
|
|
||||||
xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
|
|
||||||
xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
|
|
||||||
xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
|
|
||||||
xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
|
|
||||||
xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
|
|
||||||
xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
|
|
||||||
xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
|
|
||||||
xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
# basic search
|
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
|
||||||
|
|
||||||
string_args = dict(query=query, offset=offset, number_of_results=number_of_results)
|
|
||||||
|
|
||||||
params['url'] = base_url.format(**string_args)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
dom = etree.fromstring(resp.content)
|
|
||||||
for entry in eval_xpath_list(dom, xpath_entry):
|
|
||||||
title = eval_xpath_getindex(entry, xpath_title, 0).text
|
|
||||||
|
|
||||||
url = eval_xpath_getindex(entry, xpath_id, 0).text
|
|
||||||
abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
|
|
||||||
|
|
||||||
authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
|
|
||||||
|
|
||||||
# doi
|
|
||||||
doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
|
|
||||||
doi = None if doi_element is None else doi_element.text
|
|
||||||
|
|
||||||
# pdf
|
|
||||||
pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
|
|
||||||
pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
|
|
||||||
|
|
||||||
# journal
|
|
||||||
journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
|
|
||||||
journal = None if journal_element is None else journal_element.text
|
|
||||||
|
|
||||||
# tags
|
|
||||||
tag_elements = eval_xpath(entry, xpath_category)
|
|
||||||
tags = [str(tag) for tag in tag_elements]
|
|
||||||
|
|
||||||
# comments
|
|
||||||
comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
|
|
||||||
comments = None if comments_elements is None else comments_elements.text
|
|
||||||
|
|
||||||
publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
|
|
||||||
|
|
||||||
res_dict = {
|
|
||||||
'template': 'paper.html',
|
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'content': abstract,
|
|
||||||
'doi': doi,
|
|
||||||
'authors': authors,
|
|
||||||
'journal': journal,
|
|
||||||
'tags': tags,
|
|
||||||
'comments': comments,
|
|
||||||
'pdf_url': pdf_url,
|
|
||||||
}
|
|
||||||
|
|
||||||
results.append(res_dict)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,95 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Bandcamp (Music)
|
|
||||||
|
|
||||||
@website https://bandcamp.com/
|
|
||||||
@provide-api no
|
|
||||||
@results HTML
|
|
||||||
@parse url, title, content, publishedDate, iframe_src, thumbnail
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urlencode, urlparse, parse_qs
|
|
||||||
from dateutil.parser import parse as dateparse
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from searx.utils import (
|
|
||||||
eval_xpath_getindex,
|
|
||||||
eval_xpath_list,
|
|
||||||
extract_text,
|
|
||||||
)
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://bandcamp.com/',
|
|
||||||
"wikidata_id": 'Q545966',
|
|
||||||
"official_api_documentation": 'https://bandcamp.com/developer',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
base_url = "https://bandcamp.com/"
|
|
||||||
search_string = 'search?{query}&page={page}'
|
|
||||||
iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small"
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
'''pre-request callback
|
|
||||||
|
|
||||||
params<dict>:
|
|
||||||
method : POST/GET
|
|
||||||
headers : {}
|
|
||||||
data : {} # if method == POST
|
|
||||||
url : ''
|
|
||||||
category: 'search category'
|
|
||||||
pageno : 1 # number of the requested page
|
|
||||||
'''
|
|
||||||
|
|
||||||
search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno'])
|
|
||||||
params['url'] = base_url + search_path
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
'''post-response callback
|
|
||||||
|
|
||||||
resp: requests response object
|
|
||||||
'''
|
|
||||||
results = []
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'):
|
|
||||||
|
|
||||||
link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None)
|
|
||||||
if link is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = result.xpath('.//div[@class="heading"]/a/text()')
|
|
||||||
content = result.xpath('.//div[@class="subhead"]/text()')
|
|
||||||
new_result = {
|
|
||||||
"url": extract_text(link),
|
|
||||||
"title": extract_text(title),
|
|
||||||
"content": extract_text(content),
|
|
||||||
}
|
|
||||||
|
|
||||||
date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None)
|
|
||||||
if date:
|
|
||||||
new_result["publishedDate"] = dateparse(date.replace("released ", ""))
|
|
||||||
|
|
||||||
thumbnail = result.xpath('.//div[@class="art"]/img/@src')
|
|
||||||
if thumbnail:
|
|
||||||
new_result['img_src'] = thumbnail[0]
|
|
||||||
|
|
||||||
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
|
|
||||||
itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower()
|
|
||||||
if "album" == itemtype:
|
|
||||||
new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id)
|
|
||||||
elif "track" == itemtype:
|
|
||||||
new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id)
|
|
||||||
|
|
||||||
results.append(new_result)
|
|
||||||
return results
|
|
||||||
@@ -1,112 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
BASE (Scholar publications)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from lxml import etree
|
|
||||||
from datetime import datetime
|
|
||||||
import re
|
|
||||||
from searx.utils import searx_useragent
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://base-search.net',
|
|
||||||
"wikidata_id": 'Q448335',
|
|
||||||
"official_api_documentation": 'https://api.base-search.net/',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'XML',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['science']
|
|
||||||
|
|
||||||
base_url = (
|
|
||||||
'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'
|
|
||||||
+ '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}'
|
|
||||||
)
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
paging = True
|
|
||||||
number_of_results = 10
|
|
||||||
|
|
||||||
# shortcuts for advanced search
|
|
||||||
shorcut_dict = {
|
|
||||||
# user-friendly keywords
|
|
||||||
'format:': 'dcformat:',
|
|
||||||
'author:': 'dccreator:',
|
|
||||||
'collection:': 'dccollection:',
|
|
||||||
'hdate:': 'dchdate:',
|
|
||||||
'contributor:': 'dccontributor:',
|
|
||||||
'coverage:': 'dccoverage:',
|
|
||||||
'date:': 'dcdate:',
|
|
||||||
'abstract:': 'dcdescription:',
|
|
||||||
'urls:': 'dcidentifier:',
|
|
||||||
'language:': 'dclanguage:',
|
|
||||||
'publisher:': 'dcpublisher:',
|
|
||||||
'relation:': 'dcrelation:',
|
|
||||||
'rights:': 'dcrights:',
|
|
||||||
'source:': 'dcsource:',
|
|
||||||
'subject:': 'dcsubject:',
|
|
||||||
'title:': 'dctitle:',
|
|
||||||
'type:': 'dcdctype:',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
# replace shortcuts with API advanced search keywords
|
|
||||||
for key in shorcut_dict.keys():
|
|
||||||
query = re.sub(key, shorcut_dict[key], query)
|
|
||||||
|
|
||||||
# basic search
|
|
||||||
offset = (params['pageno'] - 1) * number_of_results
|
|
||||||
|
|
||||||
string_args = dict(query=urlencode({'query': query}), offset=offset, hits=number_of_results)
|
|
||||||
|
|
||||||
params['url'] = base_url.format(**string_args)
|
|
||||||
|
|
||||||
params['headers']['User-Agent'] = searx_useragent()
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = etree.XML(resp.content)
|
|
||||||
|
|
||||||
for entry in search_results.xpath('./result/doc'):
|
|
||||||
content = "No description available"
|
|
||||||
|
|
||||||
date = datetime.now() # needed in case no dcdate is available for an item
|
|
||||||
for item in entry:
|
|
||||||
if item.attrib["name"] == "dcdate":
|
|
||||||
date = item.text
|
|
||||||
|
|
||||||
elif item.attrib["name"] == "dctitle":
|
|
||||||
title = item.text
|
|
||||||
|
|
||||||
elif item.attrib["name"] == "dclink":
|
|
||||||
url = item.text
|
|
||||||
|
|
||||||
elif item.attrib["name"] == "dcdescription":
|
|
||||||
content = item.text[:300]
|
|
||||||
if len(item.text) > 300:
|
|
||||||
content += "..."
|
|
||||||
|
|
||||||
# dates returned by the BASE API are not several formats
|
|
||||||
publishedDate = None
|
|
||||||
for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']:
|
|
||||||
try:
|
|
||||||
publishedDate = datetime.strptime(date, date_format)
|
|
||||||
break
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if publishedDate is not None:
|
|
||||||
res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
|
|
||||||
else:
|
|
||||||
res_dict = {'url': url, 'title': title, 'content': content}
|
|
||||||
|
|
||||||
results.append(res_dict)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,337 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""This is the implementation of the Bing-WEB engine. Some of this
|
|
||||||
implementations are shared by other engines:
|
|
||||||
|
|
||||||
- :ref:`bing images engine`
|
|
||||||
- :ref:`bing news engine`
|
|
||||||
- :ref:`bing videos engine`
|
|
||||||
|
|
||||||
On the `preference page`_ Bing offers a lot of languages an regions (see section
|
|
||||||
'Search results languages' and 'Country/region'). However, the abundant choice
|
|
||||||
does not correspond to reality, where Bing has a full-text indexer only for a
|
|
||||||
limited number of languages. By example: you can select a language like Māori
|
|
||||||
but you never get a result in this language.
|
|
||||||
|
|
||||||
What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
|
|
||||||
to be completely correct either (if you take a closer look you will find some
|
|
||||||
inaccuracies there too):
|
|
||||||
|
|
||||||
- :py:obj:`searx.engines.bing.bing_traits_url`
|
|
||||||
- :py:obj:`searx.engines.bing_videos.bing_traits_url`
|
|
||||||
- :py:obj:`searx.engines.bing_images.bing_traits_url`
|
|
||||||
- :py:obj:`searx.engines.bing_news.bing_traits_url`
|
|
||||||
|
|
||||||
.. _preference page: https://www.bing.com/account/general
|
|
||||||
.. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=too-many-branches, invalid-name
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import datetime
|
|
||||||
import re
|
|
||||||
import uuid
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from lxml import html
|
|
||||||
import babel
|
|
||||||
import babel.languages
|
|
||||||
|
|
||||||
from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
|
|
||||||
from searx.locales import language_tag, region_tag
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.bing.com',
|
|
||||||
"wikidata_id": 'Q182496',
|
|
||||||
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
send_accept_language_header = True
|
|
||||||
"""Bing tries to guess user's language and territory from the HTTP
|
|
||||||
Accept-Language. Optional the user can select a search-language (can be
|
|
||||||
different to the UI language) and a region (market code)."""
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general', 'web']
|
|
||||||
paging = True
|
|
||||||
time_range_support = True
|
|
||||||
safesearch = True
|
|
||||||
safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # cookie: ADLT=STRICT
|
|
||||||
|
|
||||||
base_url = 'https://www.bing.com/search'
|
|
||||||
"""Bing (Web) search URL"""
|
|
||||||
|
|
||||||
bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
|
|
||||||
"""Bing (Web) search API description"""
|
|
||||||
|
|
||||||
|
|
||||||
def _get_offset_from_pageno(pageno):
|
|
||||||
return (pageno - 1) * 10 + 1
|
|
||||||
|
|
||||||
|
|
||||||
def set_bing_cookies(params, engine_language, engine_region, SID):
|
|
||||||
|
|
||||||
# set cookies
|
|
||||||
# -----------
|
|
||||||
|
|
||||||
params['cookies']['_EDGE_V'] = '1'
|
|
||||||
|
|
||||||
# _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
|
|
||||||
_EDGE_S = [
|
|
||||||
'F=1',
|
|
||||||
'SID=%s' % SID,
|
|
||||||
'mkt=%s' % engine_region.lower(),
|
|
||||||
'ui=%s' % engine_language.lower(),
|
|
||||||
]
|
|
||||||
params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
|
|
||||||
logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
|
|
||||||
|
|
||||||
# "_EDGE_CD": "m=zh-tw",
|
|
||||||
|
|
||||||
_EDGE_CD = [ # pylint: disable=invalid-name
|
|
||||||
'm=%s' % engine_region.lower(), # search region: zh-cn
|
|
||||||
'u=%s' % engine_language.lower(), # UI: en-us
|
|
||||||
]
|
|
||||||
|
|
||||||
params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
|
|
||||||
logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
|
|
||||||
|
|
||||||
SRCHHPGUSR = [ # pylint: disable=invalid-name
|
|
||||||
'SRCHLANG=%s' % engine_language,
|
|
||||||
# Trying to set ADLT cookie here seems not to have any effect, I assume
|
|
||||||
# there is some age verification by a cookie (and/or session ID) needed,
|
|
||||||
# to disable the SafeSearch.
|
|
||||||
'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
|
|
||||||
]
|
|
||||||
params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
|
|
||||||
logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
"""Assemble a Bing-Web request."""
|
|
||||||
|
|
||||||
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
|
|
||||||
engine_language = traits.get_language(params['searxng_locale'], 'en')
|
|
||||||
|
|
||||||
SID = uuid.uuid1().hex.upper()
|
|
||||||
CVID = uuid.uuid1().hex.upper()
|
|
||||||
|
|
||||||
set_bing_cookies(params, engine_language, engine_region, SID)
|
|
||||||
|
|
||||||
# build URL query
|
|
||||||
# ---------------
|
|
||||||
|
|
||||||
# query term
|
|
||||||
page = int(params.get('pageno', 1))
|
|
||||||
query_params = {
|
|
||||||
# fmt: off
|
|
||||||
'q': query,
|
|
||||||
'pq': query,
|
|
||||||
'cvid': CVID,
|
|
||||||
'qs': 'n',
|
|
||||||
'sp': '-1'
|
|
||||||
# fmt: on
|
|
||||||
}
|
|
||||||
|
|
||||||
# page
|
|
||||||
if page > 1:
|
|
||||||
referer = base_url + '?' + urlencode(query_params)
|
|
||||||
params['headers']['Referer'] = referer
|
|
||||||
logger.debug("headers.Referer --> %s", referer)
|
|
||||||
|
|
||||||
query_params['first'] = _get_offset_from_pageno(page)
|
|
||||||
|
|
||||||
if page == 2:
|
|
||||||
query_params['FORM'] = 'PERE'
|
|
||||||
elif page > 2:
|
|
||||||
query_params['FORM'] = 'PERE%s' % (page - 2)
|
|
||||||
|
|
||||||
filters = ''
|
|
||||||
if params['time_range']:
|
|
||||||
query_params['filt'] = 'custom'
|
|
||||||
|
|
||||||
if params['time_range'] == 'day':
|
|
||||||
filters = 'ex1:"ez1"'
|
|
||||||
elif params['time_range'] == 'week':
|
|
||||||
filters = 'ex1:"ez2"'
|
|
||||||
elif params['time_range'] == 'month':
|
|
||||||
filters = 'ex1:"ez3"'
|
|
||||||
elif params['time_range'] == 'year':
|
|
||||||
epoch_1970 = datetime.date(1970, 1, 1)
|
|
||||||
today_no = (datetime.date.today() - epoch_1970).days
|
|
||||||
filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
|
|
||||||
|
|
||||||
params['url'] = base_url + '?' + urlencode(query_params)
|
|
||||||
if filters:
|
|
||||||
params['url'] = params['url'] + '&filters=' + filters
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
# pylint: disable=too-many-locals,import-outside-toplevel
|
|
||||||
|
|
||||||
from searx.network import Request, multi_requests # see https://github.com/searxng/searxng/issues/762
|
|
||||||
|
|
||||||
results = []
|
|
||||||
result_len = 0
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results again if nothing is found yet
|
|
||||||
|
|
||||||
url_to_resolve = []
|
|
||||||
url_to_resolve_index = []
|
|
||||||
i = 0
|
|
||||||
for result in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):
|
|
||||||
|
|
||||||
link = eval_xpath_getindex(result, './/h2/a', 0, None)
|
|
||||||
if link is None:
|
|
||||||
continue
|
|
||||||
url = link.attrib.get('href')
|
|
||||||
title = extract_text(link)
|
|
||||||
|
|
||||||
content = eval_xpath(result, '(.//p)[1]')
|
|
||||||
for p in content:
|
|
||||||
# Make sure that the element is free of <a href> links
|
|
||||||
for e in p.xpath('.//a'):
|
|
||||||
e.getparent().remove(e)
|
|
||||||
content = extract_text(content)
|
|
||||||
|
|
||||||
# get the real URL either using the URL shown to user or following the Bing URL
|
|
||||||
if url.startswith('https://www.bing.com/ck/a?'):
|
|
||||||
url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
|
|
||||||
# Bing can shorten the URL either at the end or in the middle of the string
|
|
||||||
if (
|
|
||||||
url_cite
|
|
||||||
and url_cite.startswith('https://')
|
|
||||||
and '…' not in url_cite
|
|
||||||
and '...' not in url_cite
|
|
||||||
and '›' not in url_cite
|
|
||||||
):
|
|
||||||
# no need for an additional HTTP request
|
|
||||||
url = url_cite
|
|
||||||
else:
|
|
||||||
# resolve the URL with an additional HTTP request
|
|
||||||
url_to_resolve.append(url.replace('&ntb=1', '&ntb=F'))
|
|
||||||
url_to_resolve_index.append(i)
|
|
||||||
url = None # remove the result if the HTTP Bing redirect raise an exception
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'url': url, 'title': title, 'content': content})
|
|
||||||
# increment result pointer for the next iteration in this loop
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
# resolve all Bing redirections in parallel
|
|
||||||
request_list = [
|
|
||||||
Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
|
|
||||||
]
|
|
||||||
response_list = multi_requests(request_list)
|
|
||||||
for i, redirect_response in enumerate(response_list):
|
|
||||||
if not isinstance(redirect_response, Exception):
|
|
||||||
results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
|
|
||||||
|
|
||||||
# get number_of_results
|
|
||||||
try:
|
|
||||||
result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
|
|
||||||
if "-" in result_len_container:
|
|
||||||
|
|
||||||
# Remove the part "from-to" for paginated request ...
|
|
||||||
result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :]
|
|
||||||
|
|
||||||
result_len_container = re.sub('[^0-9]', '', result_len_container)
|
|
||||||
|
|
||||||
if len(result_len_container) > 0:
|
|
||||||
result_len = int(result_len_container)
|
|
||||||
|
|
||||||
except Exception as e: # pylint: disable=broad-except
|
|
||||||
logger.debug('result error :\n%s', e)
|
|
||||||
|
|
||||||
if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
|
|
||||||
return []
|
|
||||||
|
|
||||||
results.append({'number_of_results': result_len})
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch languages and regions from Bing-Web."""
|
|
||||||
|
|
||||||
xpath_market_codes = '//table[1]/tbody/tr/td[3]'
|
|
||||||
# xpath_country_codes = '//table[2]/tbody/tr/td[2]'
|
|
||||||
xpath_language_codes = '//table[3]/tbody/tr/td[2]'
|
|
||||||
|
|
||||||
_fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
|
|
||||||
|
|
||||||
|
|
||||||
def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
|
|
||||||
# pylint: disable=too-many-locals,import-outside-toplevel
|
|
||||||
|
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
|
||||||
|
|
||||||
# insert alias to map from a language (zh) to a language + script (zh_Hans)
|
|
||||||
engine_traits.languages['zh'] = 'zh-hans'
|
|
||||||
|
|
||||||
resp = get(url)
|
|
||||||
|
|
||||||
if not resp.ok: # type: ignore
|
|
||||||
print("ERROR: response from peertube is not OK.")
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text) # type: ignore
|
|
||||||
|
|
||||||
map_lang = {'jp': 'ja'}
|
|
||||||
for td in eval_xpath(dom, xpath_language_codes):
|
|
||||||
eng_lang = td.text
|
|
||||||
|
|
||||||
if eng_lang in ('en-gb', 'pt-br'):
|
|
||||||
# language 'en' is already in the list and a language 'en-gb' can't
|
|
||||||
# be handled in SearXNG, same with pt-br which is covered by pt-pt.
|
|
||||||
continue
|
|
||||||
|
|
||||||
babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
|
|
||||||
try:
|
|
||||||
sxng_tag = language_tag(babel.Locale.parse(babel_lang))
|
|
||||||
except babel.UnknownLocaleError:
|
|
||||||
print("ERROR: language (%s) is unknown by babel" % (eng_lang))
|
|
||||||
continue
|
|
||||||
conflict = engine_traits.languages.get(sxng_tag)
|
|
||||||
if conflict:
|
|
||||||
if conflict != eng_lang:
|
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
|
|
||||||
continue
|
|
||||||
engine_traits.languages[sxng_tag] = eng_lang
|
|
||||||
|
|
||||||
map_region = {
|
|
||||||
'en-ID': 'id_ID',
|
|
||||||
'no-NO': 'nb_NO',
|
|
||||||
}
|
|
||||||
|
|
||||||
for td in eval_xpath(dom, xpath_market_codes):
|
|
||||||
eng_region = td.text
|
|
||||||
babel_region = map_region.get(eng_region, eng_region).replace('-', '_')
|
|
||||||
|
|
||||||
if eng_region == 'en-WW':
|
|
||||||
engine_traits.all_locale = eng_region
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
sxng_tag = region_tag(babel.Locale.parse(babel_region))
|
|
||||||
except babel.UnknownLocaleError:
|
|
||||||
print("ERROR: region (%s) is unknown by babel" % (eng_region))
|
|
||||||
continue
|
|
||||||
conflict = engine_traits.regions.get(sxng_tag)
|
|
||||||
if conflict:
|
|
||||||
if conflict != eng_region:
|
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
|
|
||||||
continue
|
|
||||||
engine_traits.regions[sxng_tag] = eng_region
|
|
||||||
@@ -1,132 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Bing-Images: description see :py:obj:`searx.engines.bing`.
|
|
||||||
"""
|
|
||||||
# pylint: disable=invalid-name
|
|
||||||
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import uuid
|
|
||||||
import json
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.engines.bing import (
|
|
||||||
set_bing_cookies,
|
|
||||||
_fetch_traits,
|
|
||||||
)
|
|
||||||
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.bing.com/images',
|
|
||||||
"wikidata_id": 'Q182496',
|
|
||||||
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images', 'web']
|
|
||||||
paging = True
|
|
||||||
safesearch = True
|
|
||||||
time_range_support = True
|
|
||||||
|
|
||||||
base_url = 'https://www.bing.com/images/async'
|
|
||||||
"""Bing (Images) search URL"""
|
|
||||||
|
|
||||||
bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes'
|
|
||||||
"""Bing (Images) search API description"""
|
|
||||||
|
|
||||||
time_map = {
|
|
||||||
# fmt: off
|
|
||||||
'day': 60 * 24,
|
|
||||||
'week': 60 * 24 * 7,
|
|
||||||
'month': 60 * 24 * 31,
|
|
||||||
'year': 60 * 24 * 365,
|
|
||||||
# fmt: on
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
"""Assemble a Bing-Image request."""
|
|
||||||
|
|
||||||
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
|
|
||||||
engine_language = traits.get_language(params['searxng_locale'], 'en')
|
|
||||||
|
|
||||||
SID = uuid.uuid1().hex.upper()
|
|
||||||
set_bing_cookies(params, engine_language, engine_region, SID)
|
|
||||||
|
|
||||||
# build URL query
|
|
||||||
# - example: https://www.bing.com/images/async?q=foo&first=155&count=35
|
|
||||||
|
|
||||||
query_params = {
|
|
||||||
# fmt: off
|
|
||||||
'q': query,
|
|
||||||
'async' : 'content',
|
|
||||||
# to simplify the page count lets use the default of 35 images per page
|
|
||||||
'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
|
|
||||||
'count' : 35,
|
|
||||||
# fmt: on
|
|
||||||
}
|
|
||||||
|
|
||||||
# time range
|
|
||||||
# - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'
|
|
||||||
|
|
||||||
if params['time_range']:
|
|
||||||
query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']]
|
|
||||||
|
|
||||||
params['url'] = base_url + '?' + urlencode(query_params)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
"""Get response from Bing-Images"""
|
|
||||||
|
|
||||||
results = []
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'):
|
|
||||||
|
|
||||||
metadata = result.xpath('.//a[@class="iusc"]/@m')
|
|
||||||
if not metadata:
|
|
||||||
continue
|
|
||||||
|
|
||||||
metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0])
|
|
||||||
title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
|
|
||||||
img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip()
|
|
||||||
source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'images.html',
|
|
||||||
'url': metadata['purl'],
|
|
||||||
'thumbnail_src': metadata['turl'],
|
|
||||||
'img_src': metadata['murl'],
|
|
||||||
'content': metadata['desc'],
|
|
||||||
'title': title,
|
|
||||||
'source': source,
|
|
||||||
'img_format': img_format,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch languages and regions from Bing-News."""
|
|
||||||
|
|
||||||
xpath_market_codes = '//table[1]/tbody/tr/td[3]'
|
|
||||||
# xpath_country_codes = '//table[2]/tbody/tr/td[2]'
|
|
||||||
xpath_language_codes = '//table[3]/tbody/tr/td[2]'
|
|
||||||
|
|
||||||
_fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
|
|
||||||
@@ -1,150 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Bing-News: description see :py:obj:`searx.engines.bing`.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# pylint: disable=invalid-name
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import uuid
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.engines.bing import (
|
|
||||||
set_bing_cookies,
|
|
||||||
_fetch_traits,
|
|
||||||
)
|
|
||||||
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.bing.com/news',
|
|
||||||
"wikidata_id": 'Q2878637',
|
|
||||||
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'RSS',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['news']
|
|
||||||
paging = True
|
|
||||||
time_range_support = True
|
|
||||||
time_map = {
|
|
||||||
'day': '4',
|
|
||||||
'week': '8',
|
|
||||||
'month': '9',
|
|
||||||
}
|
|
||||||
"""A string '4' means *last hour*. We use *last hour* for ``day`` here since the
|
|
||||||
difference of *last day* and *last week* in the result list is just marginally.
|
|
||||||
"""
|
|
||||||
|
|
||||||
base_url = 'https://www.bing.com/news/infinitescrollajax'
|
|
||||||
"""Bing (News) search URL"""
|
|
||||||
|
|
||||||
bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes'
|
|
||||||
"""Bing (News) search API description"""
|
|
||||||
|
|
||||||
mkt_alias = {
|
|
||||||
'zh': 'en-WW',
|
|
||||||
'zh-CN': 'en-WW',
|
|
||||||
}
|
|
||||||
"""Bing News has an official market code 'zh-CN' but we won't get a result with
|
|
||||||
this market code. For 'zh' and 'zh-CN' we better use the *Worldwide aggregate*
|
|
||||||
market code (en-WW).
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
"""Assemble a Bing-News request."""
|
|
||||||
|
|
||||||
sxng_locale = params['searxng_locale']
|
|
||||||
engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
|
|
||||||
engine_language = traits.get_language(sxng_locale, 'en')
|
|
||||||
|
|
||||||
SID = uuid.uuid1().hex.upper()
|
|
||||||
set_bing_cookies(params, engine_language, engine_region, SID)
|
|
||||||
|
|
||||||
# build URL query
|
|
||||||
#
|
|
||||||
# example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
|
|
||||||
|
|
||||||
query_params = {
|
|
||||||
# fmt: off
|
|
||||||
'q': query,
|
|
||||||
'InfiniteScroll': 1,
|
|
||||||
# to simplify the page count lets use the default of 10 images per page
|
|
||||||
'first' : (int(params.get('pageno', 1)) - 1) * 10 + 1,
|
|
||||||
# fmt: on
|
|
||||||
}
|
|
||||||
|
|
||||||
if params['time_range']:
|
|
||||||
# qft=interval:"7"
|
|
||||||
query_params['qft'] = 'qft=interval="%s"' % time_map.get(params['time_range'], '9')
|
|
||||||
|
|
||||||
params['url'] = base_url + '?' + urlencode(query_params)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
"""Get response from Bing-Video"""
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if not resp.ok or not resp.text:
|
|
||||||
return results
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for newsitem in dom.xpath('//div[contains(@class, "newsitem")]'):
|
|
||||||
|
|
||||||
url = newsitem.xpath('./@url')[0]
|
|
||||||
title = ' '.join(newsitem.xpath('.//div[@class="caption"]//a[@class="title"]/text()')).strip()
|
|
||||||
content = ' '.join(newsitem.xpath('.//div[@class="snippet"]/text()')).strip()
|
|
||||||
thumbnail = None
|
|
||||||
author = newsitem.xpath('./@data-author')[0]
|
|
||||||
metadata = ' '.join(newsitem.xpath('.//div[@class="source"]/span/text()')).strip()
|
|
||||||
|
|
||||||
img_src = newsitem.xpath('.//a[@class="imagelink"]//img/@src')
|
|
||||||
if img_src:
|
|
||||||
thumbnail = 'https://www.bing.com/' + img_src[0]
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'img_src': thumbnail,
|
|
||||||
'author': author,
|
|
||||||
'metadata': metadata,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch languages and regions from Bing-News.
|
|
||||||
|
|
||||||
The :py:obj:`description <searx.engines.bing_news.bing_traits_url>` of the
|
|
||||||
first table says *"query parameter when calling the Video Search API."*
|
|
||||||
.. thats why I use the 4. table "News Category API markets" for the
|
|
||||||
``xpath_market_codes``.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
xpath_market_codes = '//table[4]/tbody/tr/td[3]'
|
|
||||||
# xpath_country_codes = '//table[2]/tbody/tr/td[2]'
|
|
||||||
xpath_language_codes = '//table[3]/tbody/tr/td[2]'
|
|
||||||
|
|
||||||
_fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
|
|
||||||
@@ -1,128 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Bing-Videos: description see :py:obj:`searx.engines.bing`.
|
|
||||||
"""
|
|
||||||
# pylint: disable=invalid-name
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import uuid
|
|
||||||
import json
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.engines.bing import (
|
|
||||||
set_bing_cookies,
|
|
||||||
_fetch_traits,
|
|
||||||
)
|
|
||||||
from searx.engines.bing import send_accept_language_header # pylint: disable=unused-import
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.bing.com/videos',
|
|
||||||
"wikidata_id": 'Q4914152',
|
|
||||||
"official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos', 'web']
|
|
||||||
paging = True
|
|
||||||
safesearch = True
|
|
||||||
time_range_support = True
|
|
||||||
|
|
||||||
base_url = 'https://www.bing.com/videos/asyncv2'
|
|
||||||
"""Bing (Videos) async search URL."""
|
|
||||||
|
|
||||||
bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes'
|
|
||||||
"""Bing (Video) search API description"""
|
|
||||||
|
|
||||||
time_map = {
|
|
||||||
# fmt: off
|
|
||||||
'day': 60 * 24,
|
|
||||||
'week': 60 * 24 * 7,
|
|
||||||
'month': 60 * 24 * 31,
|
|
||||||
'year': 60 * 24 * 365,
|
|
||||||
# fmt: on
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
"""Assemble a Bing-Video request."""
|
|
||||||
|
|
||||||
engine_region = traits.get_region(params['searxng_locale'], 'en-US')
|
|
||||||
engine_language = traits.get_language(params['searxng_locale'], 'en')
|
|
||||||
|
|
||||||
SID = uuid.uuid1().hex.upper()
|
|
||||||
set_bing_cookies(params, engine_language, engine_region, SID)
|
|
||||||
|
|
||||||
# build URL query
|
|
||||||
#
|
|
||||||
# example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
|
|
||||||
|
|
||||||
query_params = {
|
|
||||||
# fmt: off
|
|
||||||
'q': query,
|
|
||||||
'async' : 'content',
|
|
||||||
# to simplify the page count lets use the default of 35 images per page
|
|
||||||
'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
|
|
||||||
'count' : 35,
|
|
||||||
# fmt: on
|
|
||||||
}
|
|
||||||
|
|
||||||
# time range
|
|
||||||
#
|
|
||||||
# example: one week (10080 minutes) '&qft= filterui:videoage-lt10080' '&form=VRFLTR'
|
|
||||||
|
|
||||||
if params['time_range']:
|
|
||||||
query_params['form'] = 'VRFLTR'
|
|
||||||
query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']]
|
|
||||||
|
|
||||||
params['url'] = base_url + '?' + urlencode(query_params)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
"""Get response from Bing-Video"""
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'):
|
|
||||||
metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
|
|
||||||
info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
|
|
||||||
content = '{0} - {1}'.format(metadata['du'], info)
|
|
||||||
thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0]
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': metadata['murl'],
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'title': metadata.get('vt', ''),
|
|
||||||
'content': content,
|
|
||||||
'template': 'videos.html',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch languages and regions from Bing-Videos."""
|
|
||||||
|
|
||||||
xpath_market_codes = '//table[1]/tbody/tr/td[3]'
|
|
||||||
# xpath_country_codes = '//table[2]/tbody/tr/td[2]'
|
|
||||||
xpath_language_codes = '//table[3]/tbody/tr/td[2]'
|
|
||||||
|
|
||||||
_fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
|
|
||||||
@@ -1,419 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Brave supports the categories listed in :py:obj:`brave_category` (General,
|
|
||||||
news, videos, images). The support of :py:obj:`paging` and :py:obj:`time range
|
|
||||||
<time_range_support>` is limited (see remarks).
|
|
||||||
|
|
||||||
Configured ``brave`` engines:
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: brave
|
|
||||||
engine: brave
|
|
||||||
...
|
|
||||||
brave_category: search
|
|
||||||
time_range_support: true
|
|
||||||
paging: true
|
|
||||||
|
|
||||||
- name: brave.images
|
|
||||||
engine: brave
|
|
||||||
...
|
|
||||||
brave_category: images
|
|
||||||
|
|
||||||
- name: brave.videos
|
|
||||||
engine: brave
|
|
||||||
...
|
|
||||||
brave_category: videos
|
|
||||||
|
|
||||||
- name: brave.news
|
|
||||||
engine: brave
|
|
||||||
...
|
|
||||||
brave_category: news
|
|
||||||
|
|
||||||
|
|
||||||
.. _brave regions:
|
|
||||||
|
|
||||||
Brave regions
|
|
||||||
=============
|
|
||||||
|
|
||||||
Brave uses two-digit tags for the regions like ``ca`` while SearXNG deals with
|
|
||||||
locales. To get a mapping, all *officatl de-facto* languages of the Brave
|
|
||||||
region are mapped to regions in SearXNG (see :py:obj:`babel
|
|
||||||
<babel.languages.get_official_languages>`):
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
"regions": {
|
|
||||||
..
|
|
||||||
"en-CA": "ca",
|
|
||||||
"fr-CA": "ca",
|
|
||||||
..
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
The language (aka region) support of Brave's index is limited to very basic
|
|
||||||
languages. The search results for languages like Chinese or Arabic are of
|
|
||||||
low quality.
|
|
||||||
|
|
||||||
|
|
||||||
.. _brave languages:
|
|
||||||
|
|
||||||
Brave languages
|
|
||||||
===============
|
|
||||||
|
|
||||||
Brave's language support is limited to the UI (menues, area local notations,
|
|
||||||
etc). Brave's index only seems to support a locale, but it does not seem to
|
|
||||||
support any languages in its index. The choice of available languages is very
|
|
||||||
small (and its not clear to me where the differencee in UI is when switching
|
|
||||||
from en-us to en-ca or en-gb).
|
|
||||||
|
|
||||||
In the :py:obj:`EngineTraits object <searx.enginelib.traits.EngineTraits>` the
|
|
||||||
UI languages are stored in a custom field named ``ui_lang``:
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
"custom": {
|
|
||||||
"ui_lang": {
|
|
||||||
"ca": "ca",
|
|
||||||
"de-DE": "de-de",
|
|
||||||
"en-CA": "en-ca",
|
|
||||||
"en-GB": "en-gb",
|
|
||||||
"en-US": "en-us",
|
|
||||||
"es": "es",
|
|
||||||
"fr-CA": "fr-ca",
|
|
||||||
"fr-FR": "fr-fr",
|
|
||||||
"ja-JP": "ja-jp",
|
|
||||||
"pt-BR": "pt-br",
|
|
||||||
"sq-AL": "sq-al"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
Implementations
|
|
||||||
===============
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
import re
|
|
||||||
from urllib.parse import (
|
|
||||||
urlencode,
|
|
||||||
urlparse,
|
|
||||||
parse_qs,
|
|
||||||
)
|
|
||||||
|
|
||||||
import chompjs
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from searx import locales
|
|
||||||
from searx.utils import (
|
|
||||||
extract_text,
|
|
||||||
eval_xpath_list,
|
|
||||||
eval_xpath_getindex,
|
|
||||||
)
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://search.brave.com/',
|
|
||||||
"wikidata_id": 'Q22906900',
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
base_url = "https://search.brave.com/"
|
|
||||||
categories = []
|
|
||||||
brave_category = 'search'
|
|
||||||
"""Brave supports common web-search, video search, image and video search.
|
|
||||||
|
|
||||||
- ``search``: Common WEB search
|
|
||||||
- ``videos``: search for videos
|
|
||||||
- ``images``: search for images
|
|
||||||
- ``news``: search for news
|
|
||||||
"""
|
|
||||||
|
|
||||||
brave_spellcheck = False
|
|
||||||
"""Brave supports some kind of spell checking. When activated, Brave tries to
|
|
||||||
fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``. In
|
|
||||||
the UI of Brave the user gets warned about this, since we can not warn the user
|
|
||||||
in SearXNG, the spellchecking is disabled by default.
|
|
||||||
"""
|
|
||||||
|
|
||||||
send_accept_language_header = True
|
|
||||||
paging = False
|
|
||||||
"""Brave only supports paging in :py:obj:`brave_category` ``search`` (UI
|
|
||||||
category All)."""
|
|
||||||
|
|
||||||
safesearch = True
|
|
||||||
safesearch_map = {2: 'strict', 1: 'moderate', 0: 'off'} # cookie: safesearch=off
|
|
||||||
|
|
||||||
time_range_support = False
|
|
||||||
"""Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI
|
|
||||||
category All)."""
|
|
||||||
|
|
||||||
time_range_map = {
|
|
||||||
'day': 'pd',
|
|
||||||
'week': 'pw',
|
|
||||||
'month': 'pm',
|
|
||||||
'year': 'py',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
# Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787
|
|
||||||
params['headers']['Accept-Encoding'] = 'gzip, deflate'
|
|
||||||
|
|
||||||
args = {
|
|
||||||
'q': query,
|
|
||||||
}
|
|
||||||
if brave_spellcheck:
|
|
||||||
args['spellcheck'] = '1'
|
|
||||||
|
|
||||||
if brave_category == 'search':
|
|
||||||
if params.get('pageno', 1) - 1:
|
|
||||||
args['offset'] = params.get('pageno', 1) - 1
|
|
||||||
if time_range_map.get(params['time_range']):
|
|
||||||
args['tf'] = time_range_map.get(params['time_range'])
|
|
||||||
|
|
||||||
params["url"] = f"{base_url}{brave_category}?{urlencode(args)}"
|
|
||||||
|
|
||||||
# set properties in the cookies
|
|
||||||
|
|
||||||
params['cookies']['safesearch'] = safesearch_map.get(params['safesearch'], 'off')
|
|
||||||
# the useLocation is IP based, we use cookie 'country' for the region
|
|
||||||
params['cookies']['useLocation'] = '0'
|
|
||||||
params['cookies']['summarizer'] = '0'
|
|
||||||
|
|
||||||
engine_region = traits.get_region(params['searxng_locale'], 'all')
|
|
||||||
params['cookies']['country'] = engine_region.split('-')[-1].lower() # type: ignore
|
|
||||||
|
|
||||||
ui_lang = locales.get_engine_locale(params['searxng_locale'], traits.custom["ui_lang"], 'en-us')
|
|
||||||
params['cookies']['ui_lang'] = ui_lang
|
|
||||||
|
|
||||||
logger.debug("cookies %s", params['cookies'])
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
|
|
||||||
if brave_category == 'search':
|
|
||||||
return _parse_search(resp)
|
|
||||||
|
|
||||||
datastr = ""
|
|
||||||
for line in resp.text.split("\n"):
|
|
||||||
if "const data = " in line:
|
|
||||||
datastr = line.replace("const data = ", "").strip()[:-1]
|
|
||||||
break
|
|
||||||
|
|
||||||
json_data = chompjs.parse_js_object(datastr)
|
|
||||||
json_resp = json_data[1]['data']['body']['response']
|
|
||||||
|
|
||||||
if brave_category == 'news':
|
|
||||||
json_resp = json_resp['news']
|
|
||||||
return _parse_news(json_resp)
|
|
||||||
|
|
||||||
if brave_category == 'images':
|
|
||||||
return _parse_images(json_resp)
|
|
||||||
if brave_category == 'videos':
|
|
||||||
return _parse_videos(json_resp)
|
|
||||||
|
|
||||||
raise ValueError(f"Unsupported brave category: {brave_category}")
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_search(resp):
|
|
||||||
|
|
||||||
result_list = []
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
answer_tag = eval_xpath_getindex(dom, '//div[@class="answer"]', 0, default=None)
|
|
||||||
if answer_tag:
|
|
||||||
result_list.append({'answer': extract_text(answer_tag)})
|
|
||||||
|
|
||||||
# xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'
|
|
||||||
xpath_results = '//div[contains(@class, "snippet")]'
|
|
||||||
|
|
||||||
for result in eval_xpath_list(dom, xpath_results):
|
|
||||||
|
|
||||||
url = eval_xpath_getindex(result, './/a[@class="result-header"]/@href', 0, default=None)
|
|
||||||
title_tag = eval_xpath_getindex(result, './/span[@class="snippet-title"]', 0, default=None)
|
|
||||||
if not (url and title_tag):
|
|
||||||
continue
|
|
||||||
|
|
||||||
content_tag = eval_xpath_getindex(result, './/p[@class="snippet-description"]', 0, default='')
|
|
||||||
img_src = eval_xpath_getindex(result, './/img[@class="thumb"]/@src', 0, default='')
|
|
||||||
|
|
||||||
item = {
|
|
||||||
'url': url,
|
|
||||||
'title': extract_text(title_tag),
|
|
||||||
'content': extract_text(content_tag),
|
|
||||||
'img_src': img_src,
|
|
||||||
}
|
|
||||||
|
|
||||||
video_tag = eval_xpath_getindex(
|
|
||||||
result, './/div[contains(@class, "video-snippet") and @data-macro="video"]', 0, default=None
|
|
||||||
)
|
|
||||||
if video_tag is not None:
|
|
||||||
|
|
||||||
# In my tests a video tag in the WEB search was mostoften not a
|
|
||||||
# video, except the ones from youtube ..
|
|
||||||
|
|
||||||
iframe_src = _get_iframe_src(url)
|
|
||||||
if iframe_src:
|
|
||||||
item['iframe_src'] = iframe_src
|
|
||||||
item['template'] = 'videos.html'
|
|
||||||
item['thumbnail'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
|
|
||||||
else:
|
|
||||||
item['img_src'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
|
|
||||||
|
|
||||||
result_list.append(item)
|
|
||||||
|
|
||||||
return result_list
|
|
||||||
|
|
||||||
|
|
||||||
def _get_iframe_src(url):
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
if parsed_url.path == '/watch' and parsed_url.query:
|
|
||||||
video_id = parse_qs(parsed_url.query).get('v', []) # type: ignore
|
|
||||||
if video_id:
|
|
||||||
return 'https://www.youtube-nocookie.com/embed/' + video_id[0] # type: ignore
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_news(json_resp):
|
|
||||||
result_list = []
|
|
||||||
|
|
||||||
for result in json_resp["results"]:
|
|
||||||
item = {
|
|
||||||
'url': result['url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': result['description'],
|
|
||||||
}
|
|
||||||
if result['thumbnail'] != "null":
|
|
||||||
item['img_src'] = result['thumbnail']['src']
|
|
||||||
result_list.append(item)
|
|
||||||
|
|
||||||
return result_list
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_images(json_resp):
|
|
||||||
result_list = []
|
|
||||||
|
|
||||||
for result in json_resp["results"]:
|
|
||||||
item = {
|
|
||||||
'url': result['url'],
|
|
||||||
'title': result['title'],
|
|
||||||
'content': result['description'],
|
|
||||||
'template': 'images.html',
|
|
||||||
'img_format': result['properties']['format'],
|
|
||||||
'source': result['source'],
|
|
||||||
'img_src': result['properties']['url'],
|
|
||||||
}
|
|
||||||
result_list.append(item)
|
|
||||||
|
|
||||||
return result_list
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_videos(json_resp):
|
|
||||||
result_list = []
|
|
||||||
|
|
||||||
for result in json_resp["results"]:
|
|
||||||
|
|
||||||
url = result['url']
|
|
||||||
item = {
|
|
||||||
'url': url,
|
|
||||||
'title': result['title'],
|
|
||||||
'content': result['description'],
|
|
||||||
'template': 'videos.html',
|
|
||||||
'length': result['video']['duration'],
|
|
||||||
'duration': result['video']['duration'],
|
|
||||||
}
|
|
||||||
|
|
||||||
if result['thumbnail'] != "null":
|
|
||||||
item['thumbnail'] = result['thumbnail']['src']
|
|
||||||
|
|
||||||
iframe_src = _get_iframe_src(url)
|
|
||||||
if iframe_src:
|
|
||||||
item['iframe_src'] = iframe_src
|
|
||||||
|
|
||||||
result_list.append(item)
|
|
||||||
|
|
||||||
return result_list
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch :ref:`languages <brave languages>` and :ref:`regions <brave
|
|
||||||
regions>` from Brave."""
|
|
||||||
|
|
||||||
# pylint: disable=import-outside-toplevel
|
|
||||||
|
|
||||||
import babel.languages
|
|
||||||
from searx.locales import region_tag, language_tag
|
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
|
||||||
|
|
||||||
engine_traits.custom["ui_lang"] = {}
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
'Accept-Encoding': 'gzip, deflate',
|
|
||||||
}
|
|
||||||
lang_map = {'no': 'nb'} # norway
|
|
||||||
|
|
||||||
# languages (UI)
|
|
||||||
|
|
||||||
resp = get('https://search.brave.com/settings', headers=headers)
|
|
||||||
|
|
||||||
if not resp.ok: # type: ignore
|
|
||||||
print("ERROR: response from Brave is not OK.")
|
|
||||||
dom = html.fromstring(resp.text) # type: ignore
|
|
||||||
|
|
||||||
for option in dom.xpath('//div[@id="language-select"]//option'):
|
|
||||||
|
|
||||||
ui_lang = option.get('value')
|
|
||||||
try:
|
|
||||||
if '-' in ui_lang:
|
|
||||||
sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
|
|
||||||
else:
|
|
||||||
sxng_tag = language_tag(babel.Locale.parse(ui_lang))
|
|
||||||
|
|
||||||
except babel.UnknownLocaleError:
|
|
||||||
print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
|
|
||||||
continue
|
|
||||||
|
|
||||||
conflict = engine_traits.custom["ui_lang"].get(sxng_tag)
|
|
||||||
if conflict:
|
|
||||||
if conflict != ui_lang:
|
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, ui_lang))
|
|
||||||
continue
|
|
||||||
engine_traits.custom["ui_lang"][sxng_tag] = ui_lang
|
|
||||||
|
|
||||||
# search regions of brave
|
|
||||||
|
|
||||||
engine_traits.all_locale = 'all'
|
|
||||||
|
|
||||||
for country in dom.xpath('//div[@id="sidebar"]//ul/li/div[contains(@class, "country")]'):
|
|
||||||
|
|
||||||
flag = country.xpath('./span[contains(@class, "flag")]')[0]
|
|
||||||
# country_name = extract_text(flag.xpath('./following-sibling::*')[0])
|
|
||||||
country_tag = re.search(r'flag-([^\s]*)\s', flag.xpath('./@class')[0]).group(1) # type: ignore
|
|
||||||
|
|
||||||
# add offical languages of the country ..
|
|
||||||
for lang_tag in babel.languages.get_official_languages(country_tag, de_facto=True):
|
|
||||||
lang_tag = lang_map.get(lang_tag, lang_tag)
|
|
||||||
sxng_tag = region_tag(babel.Locale.parse('%s_%s' % (lang_tag, country_tag.upper())))
|
|
||||||
# print("%-20s: %s <-- %s" % (country_name, country_tag, sxng_tag))
|
|
||||||
|
|
||||||
conflict = engine_traits.regions.get(sxng_tag)
|
|
||||||
if conflict:
|
|
||||||
if conflict != country_tag:
|
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, country_tag))
|
|
||||||
continue
|
|
||||||
engine_traits.regions[sxng_tag] = country_tag
|
|
||||||
@@ -1,124 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""BT4G_ (bt4g.com) is not a tracker and doesn't store any content and only
|
|
||||||
collects torrent metadata (such as file names and file sizes) and a magnet link
|
|
||||||
(torrent identifier).
|
|
||||||
|
|
||||||
This engine does not parse the HTML page because there is an API in XML (RSS).
|
|
||||||
The RSS feed provides fewer data like amount of seeders/leechers and the files
|
|
||||||
in the torrent file. It's a tradeoff for a "stable" engine as the XML from RSS
|
|
||||||
content will change way less than the HTML page.
|
|
||||||
|
|
||||||
.. _BT4G: https://bt4g.com/
|
|
||||||
|
|
||||||
Configuration
|
|
||||||
=============
|
|
||||||
|
|
||||||
The engine has the following additional settings:
|
|
||||||
|
|
||||||
- :py:obj:`bt4g_order_by`
|
|
||||||
- :py:obj:`bt4g_category`
|
|
||||||
|
|
||||||
With this options a SearXNG maintainer is able to configure **additional**
|
|
||||||
engines for specific torrent searches. For example a engine to search only for
|
|
||||||
Movies and sort the result list by the count of seeders.
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: bt4g.movie
|
|
||||||
engine: bt4g
|
|
||||||
shortcut: bt4gv
|
|
||||||
categories: video
|
|
||||||
bt4g_order_by: seeders
|
|
||||||
bt4g_category: 'movie'
|
|
||||||
|
|
||||||
Implementations
|
|
||||||
===============
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
from datetime import datetime
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
from searx.utils import get_torrent_size
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://bt4gprx.com',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'XML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['files']
|
|
||||||
paging = True
|
|
||||||
time_range_support = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://bt4gprx.com'
|
|
||||||
search_url = url + '/search?q={search_term}&orderby={order_by}&category={category}&p={pageno}&page=rss'
|
|
||||||
bt4g_order_by = 'relevance'
|
|
||||||
"""Result list can be ordered by ``relevance`` (default), ``size``, ``seeders``
|
|
||||||
or ``time``.
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
When *time_range* is activate, the results always orderd by ``time``.
|
|
||||||
"""
|
|
||||||
|
|
||||||
bt4g_category = 'all'
|
|
||||||
"""BT$G offers categoies: ``all`` (default), ``audio``, ``movie``, ``doc``,
|
|
||||||
``app`` and `` other``.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
order_by = bt4g_order_by
|
|
||||||
if params['time_range']:
|
|
||||||
order_by = 'time'
|
|
||||||
|
|
||||||
params['url'] = search_url.format(
|
|
||||||
search_term=quote(query),
|
|
||||||
order_by=order_by,
|
|
||||||
category=bt4g_category,
|
|
||||||
pageno=params['pageno'],
|
|
||||||
)
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_results = etree.XML(resp.content)
|
|
||||||
|
|
||||||
# return empty array if nothing is found
|
|
||||||
if len(search_results) == 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
for entry in search_results.xpath('./channel/item'):
|
|
||||||
title = entry.find("title").text
|
|
||||||
link = entry.find("guid").text
|
|
||||||
fullDescription = entry.find("description").text.split('<br>')
|
|
||||||
filesize = fullDescription[1]
|
|
||||||
filesizeParsed = re.split(r"([A-Z]+)", filesize)
|
|
||||||
magnetlink = entry.find("link").text
|
|
||||||
pubDate = entry.find("pubDate").text
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': link,
|
|
||||||
'title': title,
|
|
||||||
'magnetlink': magnetlink,
|
|
||||||
'seed': 'N/A',
|
|
||||||
'leech': 'N/A',
|
|
||||||
'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]),
|
|
||||||
'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
|
|
||||||
'template': 'torrent.html',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
BTDigg (Videos, Music, Files)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from lxml import html
|
|
||||||
from urllib.parse import quote, urljoin
|
|
||||||
from searx.utils import extract_text, get_torrent_size
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://btdig.com',
|
|
||||||
"wikidata_id": 'Q4836698',
|
|
||||||
"official_api_documentation": {'url': 'https://btdig.com/contacts', 'comment': 'on demand'},
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['files']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://btdig.com'
|
|
||||||
search_url = url + '/search?q={search_term}&p={pageno}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'] - 1)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
search_res = dom.xpath('//div[@class="one_result"]')
|
|
||||||
|
|
||||||
# return empty array if nothing is found
|
|
||||||
if not search_res:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res:
|
|
||||||
link = result.xpath('.//div[@class="torrent_name"]//a')[0]
|
|
||||||
href = urljoin(url, link.attrib.get('href'))
|
|
||||||
title = extract_text(link)
|
|
||||||
|
|
||||||
excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0]
|
|
||||||
content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False)
|
|
||||||
# it is better to emit <br/> instead of |, but html tags are verboten
|
|
||||||
content = content.strip().replace('\n', ' | ')
|
|
||||||
content = ' '.join(content.split())
|
|
||||||
|
|
||||||
filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0]
|
|
||||||
filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1]
|
|
||||||
files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0]
|
|
||||||
|
|
||||||
# convert filesize to byte if possible
|
|
||||||
filesize = get_torrent_size(filesize, filesize_multiplier)
|
|
||||||
|
|
||||||
# convert files to int if possible
|
|
||||||
try:
|
|
||||||
files = int(files)
|
|
||||||
except:
|
|
||||||
files = None
|
|
||||||
|
|
||||||
magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href']
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': href,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'filesize': filesize,
|
|
||||||
'files': files,
|
|
||||||
'magnetlink': magnetlink,
|
|
||||||
'template': 'torrent.html',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# return results sorted by seeder
|
|
||||||
return results
|
|
||||||
@@ -1,243 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""With *command engines* administrators can run engines to integrate arbitrary
|
|
||||||
shell commands.
|
|
||||||
|
|
||||||
.. attention::
|
|
||||||
|
|
||||||
When creating and enabling a ``command`` engine on a public instance, you
|
|
||||||
must be careful to avoid leaking private data.
|
|
||||||
|
|
||||||
The easiest solution is to limit the access by setting ``tokens`` as described
|
|
||||||
in section :ref:`private engines`. The engine base is flexible. Only your
|
|
||||||
imagination can limit the power of this engine (and maybe security concerns).
|
|
||||||
|
|
||||||
Configuration
|
|
||||||
=============
|
|
||||||
|
|
||||||
The following options are available:
|
|
||||||
|
|
||||||
``command``:
|
|
||||||
A comma separated list of the elements of the command. A special token
|
|
||||||
``{{QUERY}}`` tells where to put the search terms of the user. Example:
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
['ls', '-l', '-h', '{{QUERY}}']
|
|
||||||
|
|
||||||
``delimiter``:
|
|
||||||
A mapping containing a delimiter ``char`` and the *titles* of each element in
|
|
||||||
``keys``.
|
|
||||||
|
|
||||||
``parse_regex``:
|
|
||||||
A dict containing the regular expressions for each result key.
|
|
||||||
|
|
||||||
``query_type``:
|
|
||||||
|
|
||||||
The expected type of user search terms. Possible values: ``path`` and
|
|
||||||
``enum``.
|
|
||||||
|
|
||||||
``path``:
|
|
||||||
Checks if the user provided path is inside the working directory. If not,
|
|
||||||
the query is not executed.
|
|
||||||
|
|
||||||
``enum``:
|
|
||||||
Is a list of allowed search terms. If the user submits something which is
|
|
||||||
not included in the list, the query returns an error.
|
|
||||||
|
|
||||||
``query_enum``:
|
|
||||||
A list containing allowed search terms if ``query_type`` is set to ``enum``.
|
|
||||||
|
|
||||||
``working_dir``:
|
|
||||||
The directory where the command has to be executed. Default: ``./``.
|
|
||||||
|
|
||||||
``result_separator``:
|
|
||||||
The character that separates results. Default: ``\\n``.
|
|
||||||
|
|
||||||
Example
|
|
||||||
=======
|
|
||||||
|
|
||||||
The example engine below can be used to find files with a specific name in the
|
|
||||||
configured working directory:
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: find
|
|
||||||
engine: command
|
|
||||||
command: ['find', '.', '-name', '{{QUERY}}']
|
|
||||||
query_type: path
|
|
||||||
shortcut: fnd
|
|
||||||
delimiter:
|
|
||||||
chars: ' '
|
|
||||||
keys: ['line']
|
|
||||||
|
|
||||||
Implementations
|
|
||||||
===============
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
from os.path import expanduser, isabs, realpath, commonprefix
|
|
||||||
from shlex import split as shlex_split
|
|
||||||
from subprocess import Popen, PIPE
|
|
||||||
from threading import Thread
|
|
||||||
|
|
||||||
from searx import logger
|
|
||||||
|
|
||||||
|
|
||||||
engine_type = 'offline'
|
|
||||||
paging = True
|
|
||||||
command = []
|
|
||||||
delimiter = {}
|
|
||||||
parse_regex = {}
|
|
||||||
query_type = ''
|
|
||||||
query_enum = []
|
|
||||||
environment_variables = {}
|
|
||||||
working_dir = realpath('.')
|
|
||||||
result_separator = '\n'
|
|
||||||
result_template = 'key-value.html'
|
|
||||||
timeout = 4.0
|
|
||||||
|
|
||||||
_command_logger = logger.getChild('command')
|
|
||||||
_compiled_parse_regex = {}
|
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings):
|
|
||||||
check_parsing_options(engine_settings)
|
|
||||||
|
|
||||||
if 'command' not in engine_settings:
|
|
||||||
raise ValueError('engine command : missing configuration key: command')
|
|
||||||
|
|
||||||
global command, working_dir, delimiter, parse_regex, environment_variables
|
|
||||||
|
|
||||||
command = engine_settings['command']
|
|
||||||
|
|
||||||
if 'working_dir' in engine_settings:
|
|
||||||
working_dir = engine_settings['working_dir']
|
|
||||||
if not isabs(engine_settings['working_dir']):
|
|
||||||
working_dir = realpath(working_dir)
|
|
||||||
|
|
||||||
if 'parse_regex' in engine_settings:
|
|
||||||
parse_regex = engine_settings['parse_regex']
|
|
||||||
for result_key, regex in parse_regex.items():
|
|
||||||
_compiled_parse_regex[result_key] = re.compile(regex, flags=re.MULTILINE)
|
|
||||||
if 'delimiter' in engine_settings:
|
|
||||||
delimiter = engine_settings['delimiter']
|
|
||||||
|
|
||||||
if 'environment_variables' in engine_settings:
|
|
||||||
environment_variables = engine_settings['environment_variables']
|
|
||||||
|
|
||||||
|
|
||||||
def search(query, params):
|
|
||||||
cmd = _get_command_to_run(query)
|
|
||||||
if not cmd:
|
|
||||||
return []
|
|
||||||
|
|
||||||
results = []
|
|
||||||
reader_thread = Thread(target=_get_results_from_process, args=(results, cmd, params['pageno']))
|
|
||||||
reader_thread.start()
|
|
||||||
reader_thread.join(timeout=timeout)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def _get_command_to_run(query):
|
|
||||||
params = shlex_split(query)
|
|
||||||
__check_query_params(params)
|
|
||||||
|
|
||||||
cmd = []
|
|
||||||
for c in command:
|
|
||||||
if c == '{{QUERY}}':
|
|
||||||
cmd.extend(params)
|
|
||||||
else:
|
|
||||||
cmd.append(c)
|
|
||||||
|
|
||||||
return cmd
|
|
||||||
|
|
||||||
|
|
||||||
def _get_results_from_process(results, cmd, pageno):
|
|
||||||
leftover = ''
|
|
||||||
count = 0
|
|
||||||
start, end = __get_results_limits(pageno)
|
|
||||||
with Popen(cmd, stdout=PIPE, stderr=PIPE, env=environment_variables) as process:
|
|
||||||
line = process.stdout.readline()
|
|
||||||
while line:
|
|
||||||
buf = leftover + line.decode('utf-8')
|
|
||||||
raw_results = buf.split(result_separator)
|
|
||||||
if raw_results[-1]:
|
|
||||||
leftover = raw_results[-1]
|
|
||||||
raw_results = raw_results[:-1]
|
|
||||||
|
|
||||||
for raw_result in raw_results:
|
|
||||||
result = __parse_single_result(raw_result)
|
|
||||||
if result is None:
|
|
||||||
_command_logger.debug('skipped result:', raw_result)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if start <= count and count <= end:
|
|
||||||
result['template'] = result_template
|
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
count += 1
|
|
||||||
if end < count:
|
|
||||||
return results
|
|
||||||
|
|
||||||
line = process.stdout.readline()
|
|
||||||
|
|
||||||
return_code = process.wait(timeout=timeout)
|
|
||||||
if return_code != 0:
|
|
||||||
raise RuntimeError('non-zero return code when running command', cmd, return_code)
|
|
||||||
|
|
||||||
|
|
||||||
def __get_results_limits(pageno):
|
|
||||||
start = (pageno - 1) * 10
|
|
||||||
end = start + 9
|
|
||||||
return start, end
|
|
||||||
|
|
||||||
|
|
||||||
def __check_query_params(params):
|
|
||||||
if not query_type:
|
|
||||||
return
|
|
||||||
|
|
||||||
if query_type == 'path':
|
|
||||||
query_path = params[-1]
|
|
||||||
query_path = expanduser(query_path)
|
|
||||||
if commonprefix([realpath(query_path), working_dir]) != working_dir:
|
|
||||||
raise ValueError('requested path is outside of configured working directory')
|
|
||||||
elif query_type == 'enum' and len(query_enum) > 0:
|
|
||||||
for param in params:
|
|
||||||
if param not in query_enum:
|
|
||||||
raise ValueError('submitted query params is not allowed', param, 'allowed params:', query_enum)
|
|
||||||
|
|
||||||
|
|
||||||
def check_parsing_options(engine_settings):
|
|
||||||
"""Checks if delimiter based parsing or regex parsing is configured correctly"""
|
|
||||||
|
|
||||||
if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
|
|
||||||
raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
|
|
||||||
if 'delimiter' in engine_settings and 'parse_regex' in engine_settings:
|
|
||||||
raise ValueError('failed to init settings for parsing lines: too many settings')
|
|
||||||
|
|
||||||
if 'delimiter' in engine_settings:
|
|
||||||
if 'chars' not in engine_settings['delimiter'] or 'keys' not in engine_settings['delimiter']:
|
|
||||||
raise ValueError
|
|
||||||
|
|
||||||
|
|
||||||
def __parse_single_result(raw_result):
|
|
||||||
"""Parses command line output based on configuration"""
|
|
||||||
|
|
||||||
result = {}
|
|
||||||
|
|
||||||
if delimiter:
|
|
||||||
elements = raw_result.split(delimiter['chars'], maxsplit=len(delimiter['keys']) - 1)
|
|
||||||
if len(elements) != len(delimiter['keys']):
|
|
||||||
return {}
|
|
||||||
for i in range(len(elements)):
|
|
||||||
result[delimiter['keys'][i]] = elements[i]
|
|
||||||
|
|
||||||
if parse_regex:
|
|
||||||
for result_key, regex in _compiled_parse_regex.items():
|
|
||||||
found = regex.search(raw_result)
|
|
||||||
if not found:
|
|
||||||
return {}
|
|
||||||
result[result_key] = raw_result[found.start() : found.end()]
|
|
||||||
|
|
||||||
return result
|
|
||||||
@@ -1,116 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""CORE (science)
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
from searx.exceptions import SearxEngineAPIException
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://core.ac.uk',
|
|
||||||
"wikidata_id": 'Q22661180',
|
|
||||||
"official_api_documentation": 'https://core.ac.uk/documentation/api/',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": True,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['science', 'scientific publications']
|
|
||||||
paging = True
|
|
||||||
nb_per_page = 10
|
|
||||||
|
|
||||||
api_key = 'unset'
|
|
||||||
|
|
||||||
base_url = 'https://core.ac.uk:443/api-v2/search/'
|
|
||||||
search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
if api_key == 'unset':
|
|
||||||
raise SearxEngineAPIException('missing CORE API key')
|
|
||||||
|
|
||||||
search_path = search_string.format(
|
|
||||||
query=urlencode({'q': query}),
|
|
||||||
nb_per_page=nb_per_page,
|
|
||||||
page=params['pageno'],
|
|
||||||
apikey=api_key,
|
|
||||||
)
|
|
||||||
params['url'] = base_url + search_path
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
json_data = resp.json()
|
|
||||||
|
|
||||||
for result in json_data['data']:
|
|
||||||
source = result['_source']
|
|
||||||
url = None
|
|
||||||
if source.get('urls'):
|
|
||||||
url = source['urls'][0].replace('http://', 'https://', 1)
|
|
||||||
|
|
||||||
if url is None and source.get('doi'):
|
|
||||||
# use the DOI reference
|
|
||||||
url = 'https://doi.org/' + source['doi']
|
|
||||||
|
|
||||||
if url is None and source.get('downloadUrl'):
|
|
||||||
# use the downloadUrl
|
|
||||||
url = source['downloadUrl']
|
|
||||||
|
|
||||||
if url is None and source.get('identifiers'):
|
|
||||||
# try to find an ark id, see
|
|
||||||
# https://www.wikidata.org/wiki/Property:P8091
|
|
||||||
# and https://en.wikipedia.org/wiki/Archival_Resource_Key
|
|
||||||
arkids = [
|
|
||||||
identifier[5:] # 5 is the length of "ark:/"
|
|
||||||
for identifier in source.get('identifiers')
|
|
||||||
if isinstance(identifier, str) and identifier.startswith('ark:/')
|
|
||||||
]
|
|
||||||
if len(arkids) > 0:
|
|
||||||
url = 'https://n2t.net/' + arkids[0]
|
|
||||||
|
|
||||||
if url is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
publishedDate = None
|
|
||||||
time = source['publishedDate'] or source['depositedDate']
|
|
||||||
if time:
|
|
||||||
publishedDate = datetime.fromtimestamp(time / 1000)
|
|
||||||
|
|
||||||
# sometimes the 'title' is None / filter None values
|
|
||||||
journals = [j['title'] for j in (source.get('journals') or []) if j['title']]
|
|
||||||
|
|
||||||
publisher = source['publisher']
|
|
||||||
if publisher:
|
|
||||||
publisher = source['publisher'].strip("'")
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'paper.html',
|
|
||||||
'title': source['title'],
|
|
||||||
'url': url,
|
|
||||||
'content': source['description'] or '',
|
|
||||||
# 'comments': '',
|
|
||||||
'tags': source['topics'],
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'type': (source['types'] or [None])[0],
|
|
||||||
'authors': source['authors'],
|
|
||||||
'editor': ', '.join(source['contributors'] or []),
|
|
||||||
'publisher': publisher,
|
|
||||||
'journal': ', '.join(journals),
|
|
||||||
# 'volume': '',
|
|
||||||
# 'pages' : '',
|
|
||||||
# 'number': '',
|
|
||||||
'doi': source['doi'],
|
|
||||||
'issn': [x for x in [source.get('issn')] if x],
|
|
||||||
'isbn': [x for x in [source.get('isbn')] if x], # exists in the rawRecordXml
|
|
||||||
'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Semantic Scholar (Science)
|
|
||||||
"""
|
|
||||||
# pylint: disable=use-dict-literal
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from searx.utils import html_to_text
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.crossref.org/',
|
|
||||||
"wikidata_id": 'Q5188229',
|
|
||||||
"official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['science', 'scientific publications']
|
|
||||||
paging = True
|
|
||||||
search_url = 'https://api.crossref.org/works'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
res = resp.json()
|
|
||||||
results = []
|
|
||||||
for record in res['message']['items']:
|
|
||||||
record_type = record['type']
|
|
||||||
if record_type == 'book-chapter':
|
|
||||||
title = record['container-title'][0]
|
|
||||||
if record['title'][0].lower().strip() != title.lower().strip():
|
|
||||||
title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
|
|
||||||
journal = None
|
|
||||||
else:
|
|
||||||
title = html_to_text(record['title'][0])
|
|
||||||
journal = record.get('container-title', [None])[0]
|
|
||||||
url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
|
|
||||||
authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
|
|
||||||
isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'paper.html',
|
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'journal': journal,
|
|
||||||
'volume': record.get('volume'),
|
|
||||||
'type': record['type'],
|
|
||||||
'content': html_to_text(record.get('abstract', '')),
|
|
||||||
'publisher': record.get('publisher'),
|
|
||||||
'authors': authors,
|
|
||||||
'doi': record['DOI'],
|
|
||||||
'isbn': isbn,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return results
|
|
||||||
@@ -1,56 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Currency convert (DuckDuckGo)
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://duckduckgo.com/',
|
|
||||||
"wikidata_id": 'Q12805',
|
|
||||||
"official_api_documentation": 'https://duckduckgo.com/api',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSONP',
|
|
||||||
"description": "Service from DuckDuckGo.",
|
|
||||||
}
|
|
||||||
|
|
||||||
engine_type = 'online_currency'
|
|
||||||
categories = []
|
|
||||||
base_url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
|
|
||||||
weight = 100
|
|
||||||
|
|
||||||
https_support = True
|
|
||||||
|
|
||||||
|
|
||||||
def request(_query, params):
|
|
||||||
params['url'] = base_url.format(params['from'], params['to'])
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
"""remove first and last lines to get only json"""
|
|
||||||
json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
|
|
||||||
results = []
|
|
||||||
try:
|
|
||||||
conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount'])
|
|
||||||
except ValueError:
|
|
||||||
return results
|
|
||||||
answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
|
|
||||||
resp.search_params['amount'],
|
|
||||||
resp.search_params['from'],
|
|
||||||
resp.search_params['amount'] * conversion_rate,
|
|
||||||
resp.search_params['to'],
|
|
||||||
conversion_rate,
|
|
||||||
resp.search_params['from_name'],
|
|
||||||
resp.search_params['to_name'],
|
|
||||||
)
|
|
||||||
|
|
||||||
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'.format(
|
|
||||||
resp.search_params['from'].upper(), resp.search_params['to']
|
|
||||||
)
|
|
||||||
|
|
||||||
results.append({'answer': answer, 'url': url})
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,252 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Dailymotion (Videos)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
.. _REST GET: https://developers.dailymotion.com/tools/
|
|
||||||
.. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
|
|
||||||
.. _Video filters API: https://developers.dailymotion.com/api/#video-filters
|
|
||||||
.. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
import time
|
|
||||||
import babel
|
|
||||||
|
|
||||||
from searx.network import get, raise_for_httperror # see https://github.com/searxng/searxng/issues/762
|
|
||||||
from searx.utils import html_to_text
|
|
||||||
from searx.exceptions import SearxEngineAPIException
|
|
||||||
from searx.locales import region_tag, language_tag
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.dailymotion.com',
|
|
||||||
"wikidata_id": 'Q769222',
|
|
||||||
"official_api_documentation": 'https://www.dailymotion.com/developer',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['videos']
|
|
||||||
paging = True
|
|
||||||
number_of_results = 10
|
|
||||||
|
|
||||||
time_range_support = True
|
|
||||||
time_delta_dict = {
|
|
||||||
"day": timedelta(days=1),
|
|
||||||
"week": timedelta(days=7),
|
|
||||||
"month": timedelta(days=31),
|
|
||||||
"year": timedelta(days=365),
|
|
||||||
}
|
|
||||||
|
|
||||||
safesearch = True
|
|
||||||
safesearch_params = {
|
|
||||||
2: {'is_created_for_kids': 'true'},
|
|
||||||
1: {'is_created_for_kids': 'true'},
|
|
||||||
0: {},
|
|
||||||
}
|
|
||||||
"""True if this video is "Created for Kids" / intends to target an audience
|
|
||||||
under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
|
|
||||||
"""
|
|
||||||
|
|
||||||
family_filter_map = {
|
|
||||||
2: 'true',
|
|
||||||
1: 'true',
|
|
||||||
0: 'false',
|
|
||||||
}
|
|
||||||
"""By default, the family filter is turned on. Setting this parameter to
|
|
||||||
``false`` will stop filtering-out explicit content from searches and global
|
|
||||||
contexts (``family_filter`` in `Global API Parameters`_ ).
|
|
||||||
"""
|
|
||||||
|
|
||||||
result_fields = [
|
|
||||||
'allow_embed',
|
|
||||||
'description',
|
|
||||||
'title',
|
|
||||||
'created_time',
|
|
||||||
'duration',
|
|
||||||
'url',
|
|
||||||
'thumbnail_360_url',
|
|
||||||
'id',
|
|
||||||
]
|
|
||||||
"""`Fields selection`_, by default, a few fields are returned. To request more
|
|
||||||
specific fields, the ``fields`` parameter is used with the list of fields
|
|
||||||
SearXNG needs in the response to build a video result list.
|
|
||||||
"""
|
|
||||||
|
|
||||||
search_url = 'https://api.dailymotion.com/videos?'
|
|
||||||
"""URL to retrieve a list of videos.
|
|
||||||
|
|
||||||
- `REST GET`_
|
|
||||||
- `Global API Parameters`_
|
|
||||||
- `Video filters API`_
|
|
||||||
"""
|
|
||||||
|
|
||||||
iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
|
|
||||||
"""URL template to embed video in SearXNG's result list."""
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
if not query:
|
|
||||||
return False
|
|
||||||
|
|
||||||
eng_region: str = traits.get_region(params['searxng_locale'], 'en_US') # type: ignore
|
|
||||||
eng_lang = traits.get_language(params['searxng_locale'], 'en')
|
|
||||||
|
|
||||||
args = {
|
|
||||||
'search': query,
|
|
||||||
'family_filter': family_filter_map.get(params['safesearch'], 'false'),
|
|
||||||
'thumbnail_ratio': 'original', # original|widescreen|square
|
|
||||||
# https://developers.dailymotion.com/api/#video-filters
|
|
||||||
'languages': eng_lang,
|
|
||||||
'page': params['pageno'],
|
|
||||||
'password_protected': 'false',
|
|
||||||
'private': 'false',
|
|
||||||
'sort': 'relevance',
|
|
||||||
'limit': number_of_results,
|
|
||||||
'fields': ','.join(result_fields),
|
|
||||||
}
|
|
||||||
|
|
||||||
args.update(safesearch_params.get(params['safesearch'], {}))
|
|
||||||
|
|
||||||
# Don't add localization and country arguments if the user does select a
|
|
||||||
# language (:de, :en, ..)
|
|
||||||
|
|
||||||
if len(params['searxng_locale'].split('-')) > 1:
|
|
||||||
# https://developers.dailymotion.com/api/#global-parameters
|
|
||||||
args['localization'] = eng_region
|
|
||||||
args['country'] = eng_region.split('_')[1]
|
|
||||||
# Insufficient rights for the `ams_country' parameter of route `GET /videos'
|
|
||||||
# 'ams_country': eng_region.split('_')[1],
|
|
||||||
|
|
||||||
time_delta = time_delta_dict.get(params["time_range"])
|
|
||||||
if time_delta:
|
|
||||||
created_after = datetime.now() - time_delta
|
|
||||||
args['created_after'] = datetime.timestamp(created_after)
|
|
||||||
|
|
||||||
query_str = urlencode(args)
|
|
||||||
params['url'] = search_url + query_str
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = resp.json()
|
|
||||||
|
|
||||||
# check for an API error
|
|
||||||
if 'error' in search_res:
|
|
||||||
raise SearxEngineAPIException(search_res['error'].get('message'))
|
|
||||||
|
|
||||||
raise_for_httperror(resp)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for res in search_res.get('list', []):
|
|
||||||
|
|
||||||
title = res['title']
|
|
||||||
url = res['url']
|
|
||||||
|
|
||||||
content = html_to_text(res['description'])
|
|
||||||
if len(content) > 300:
|
|
||||||
content = content[:300] + '...'
|
|
||||||
|
|
||||||
publishedDate = datetime.fromtimestamp(res['created_time'], None)
|
|
||||||
|
|
||||||
length = time.gmtime(res.get('duration'))
|
|
||||||
if length.tm_hour:
|
|
||||||
length = time.strftime("%H:%M:%S", length)
|
|
||||||
else:
|
|
||||||
length = time.strftime("%M:%S", length)
|
|
||||||
|
|
||||||
thumbnail = res['thumbnail_360_url']
|
|
||||||
thumbnail = thumbnail.replace("http://", "https://")
|
|
||||||
|
|
||||||
item = {
|
|
||||||
'template': 'videos.html',
|
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'publishedDate': publishedDate,
|
|
||||||
'length': length,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
||||||
|
|
||||||
# HINT: no mater what the value is, without API token videos can't shown
|
|
||||||
# embedded
|
|
||||||
if res['allow_embed']:
|
|
||||||
item['iframe_src'] = iframe_src.format(video_id=res['id'])
|
|
||||||
|
|
||||||
results.append(item)
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch locales & languages from dailymotion.
|
|
||||||
|
|
||||||
Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
|
|
||||||
There are duplications in the locale codes returned from Dailymotion which
|
|
||||||
can be ignored::
|
|
||||||
|
|
||||||
en_EN --> en_GB, en_US
|
|
||||||
ar_AA --> ar_EG, ar_AE, ar_SA
|
|
||||||
|
|
||||||
The language list `api/languages <https://api.dailymotion.com/languages>`_
|
|
||||||
contains over 7000 *languages* codes (see PR1071_). We use only those
|
|
||||||
language codes that are used in the locales.
|
|
||||||
|
|
||||||
.. _PR1071: https://github.com/searxng/searxng/pull/1071
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
resp = get('https://api.dailymotion.com/locales')
|
|
||||||
if not resp.ok: # type: ignore
|
|
||||||
print("ERROR: response from dailymotion/locales is not OK.")
|
|
||||||
|
|
||||||
for item in resp.json()['list']: # type: ignore
|
|
||||||
eng_tag = item['locale']
|
|
||||||
if eng_tag in ('en_EN', 'ar_AA'):
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
sxng_tag = region_tag(babel.Locale.parse(eng_tag))
|
|
||||||
except babel.UnknownLocaleError:
|
|
||||||
print("ERROR: item unknown --> %s" % item)
|
|
||||||
continue
|
|
||||||
|
|
||||||
conflict = engine_traits.regions.get(sxng_tag)
|
|
||||||
if conflict:
|
|
||||||
if conflict != eng_tag:
|
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
|
||||||
continue
|
|
||||||
engine_traits.regions[sxng_tag] = eng_tag
|
|
||||||
|
|
||||||
locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
|
|
||||||
|
|
||||||
resp = get('https://api.dailymotion.com/languages')
|
|
||||||
if not resp.ok: # type: ignore
|
|
||||||
print("ERROR: response from dailymotion/languages is not OK.")
|
|
||||||
|
|
||||||
for item in resp.json()['list']: # type: ignore
|
|
||||||
eng_tag = item['code']
|
|
||||||
if eng_tag in locale_lang_list:
|
|
||||||
sxng_tag = language_tag(babel.Locale.parse(eng_tag))
|
|
||||||
engine_traits.languages[sxng_tag] = eng_tag
|
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Deepl translation engine"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://deepl.com',
|
|
||||||
"wikidata_id": 'Q43968444',
|
|
||||||
"official_api_documentation": 'https://www.deepl.com/docs-api',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": True,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
engine_type = 'online_dictionary'
|
|
||||||
categories = ['general']
|
|
||||||
|
|
||||||
url = 'https://api-free.deepl.com/v2/translate'
|
|
||||||
api_key = None
|
|
||||||
|
|
||||||
|
|
||||||
def request(_query, params):
|
|
||||||
'''pre-request callback
|
|
||||||
|
|
||||||
params<dict>:
|
|
||||||
|
|
||||||
- ``method`` : POST/GET
|
|
||||||
- ``headers``: {}
|
|
||||||
- ``data``: {} # if method == POST
|
|
||||||
- ``url``: ''
|
|
||||||
- ``category``: 'search category'
|
|
||||||
- ``pageno``: 1 # number of the requested page
|
|
||||||
'''
|
|
||||||
|
|
||||||
params['url'] = url
|
|
||||||
params['method'] = 'POST'
|
|
||||||
params['data'] = {'auth_key': api_key, 'text': params['query'], 'target_lang': params['to_lang'][1]}
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
result = loads(resp.text)
|
|
||||||
translations = result['translations']
|
|
||||||
|
|
||||||
infobox = "<dl>"
|
|
||||||
|
|
||||||
for translation in translations:
|
|
||||||
infobox += f"<dd>{translation['text']}</dd>"
|
|
||||||
|
|
||||||
infobox += "</dl>"
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'infobox': 'Deepl',
|
|
||||||
'content': infobox,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
Deezer (Music)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://deezer.com',
|
|
||||||
"wikidata_id": 'Q602243',
|
|
||||||
"official_api_documentation": 'https://developers.deezer.com/',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['music']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
url = 'https://api.deezer.com/'
|
|
||||||
search_url = url + 'search?{query}&index={offset}'
|
|
||||||
iframe_src = "https://www.deezer.com/plugins/player?type=tracks&id={audioid}"
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
offset = (params['pageno'] - 1) * 25
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = loads(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
for result in search_res.get('data', []):
|
|
||||||
if result['type'] == 'track':
|
|
||||||
title = result['title']
|
|
||||||
url = result['link']
|
|
||||||
|
|
||||||
if url.startswith('http://'):
|
|
||||||
url = 'https' + url[4:]
|
|
||||||
|
|
||||||
content = '{} - {} - {}'.format(result['artist']['name'], result['album']['title'], result['title'])
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append(
|
|
||||||
{'url': url, 'title': title, 'iframe_src': iframe_src.format(audioid=result['id']), 'content': content}
|
|
||||||
)
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
||||||
@@ -1,73 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Within this module we implement a *demo offline engine*. Do not look to
|
|
||||||
close to the implementation, its just a simple example. To get in use of this
|
|
||||||
*demo* engine add the following entry to your engines list in ``settings.yml``:
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: my offline engine
|
|
||||||
engine: demo_offline
|
|
||||||
shortcut: demo
|
|
||||||
disabled: false
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
engine_type = 'offline'
|
|
||||||
categories = ['general']
|
|
||||||
disabled = True
|
|
||||||
timeout = 2.0
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"wikidata_id": None,
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
# if there is a need for globals, use a leading underline
|
|
||||||
_my_offline_engine = None
|
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings=None):
|
|
||||||
"""Initialization of the (offline) engine. The origin of this demo engine is a
|
|
||||||
simple json string which is loaded in this example while the engine is
|
|
||||||
initialized.
|
|
||||||
|
|
||||||
"""
|
|
||||||
global _my_offline_engine # pylint: disable=global-statement
|
|
||||||
|
|
||||||
_my_offline_engine = (
|
|
||||||
'[ {"value": "%s"}'
|
|
||||||
', {"value":"first item"}'
|
|
||||||
', {"value":"second item"}'
|
|
||||||
', {"value":"third item"}'
|
|
||||||
']' % engine_settings.get('name')
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def search(query, request_params):
|
|
||||||
"""Query (offline) engine and return results. Assemble the list of results from
|
|
||||||
your local engine. In this demo engine we ignore the 'query' term, usual
|
|
||||||
you would pass the 'query' term to your local engine to filter out the
|
|
||||||
results.
|
|
||||||
|
|
||||||
"""
|
|
||||||
ret_val = []
|
|
||||||
|
|
||||||
result_list = json.loads(_my_offline_engine)
|
|
||||||
|
|
||||||
for row in result_list:
|
|
||||||
entry = {
|
|
||||||
'query': query,
|
|
||||||
'language': request_params['searxng_locale'],
|
|
||||||
'value': row.get("value"),
|
|
||||||
# choose a result template or comment out to use the *default*
|
|
||||||
'template': 'key-value.html',
|
|
||||||
}
|
|
||||||
ret_val.append(entry)
|
|
||||||
|
|
||||||
return ret_val
|
|
||||||
@@ -1,100 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Within this module we implement a *demo online engine*. Do not look to
|
|
||||||
close to the implementation, its just a simple example which queries `The Art
|
|
||||||
Institute of Chicago <https://www.artic.edu>`_
|
|
||||||
|
|
||||||
To get in use of this *demo* engine add the following entry to your engines
|
|
||||||
list in ``settings.yml``:
|
|
||||||
|
|
||||||
.. code:: yaml
|
|
||||||
|
|
||||||
- name: my online engine
|
|
||||||
engine: demo_online
|
|
||||||
shortcut: demo
|
|
||||||
disabled: false
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
engine_type = 'online'
|
|
||||||
send_accept_language_header = True
|
|
||||||
categories = ['general']
|
|
||||||
disabled = True
|
|
||||||
timeout = 2.0
|
|
||||||
categories = ['images']
|
|
||||||
paging = True
|
|
||||||
page_size = 20
|
|
||||||
|
|
||||||
search_api = 'https://api.artic.edu/api/v1/artworks/search?'
|
|
||||||
image_api = 'https://www.artic.edu/iiif/2/'
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.artic.edu',
|
|
||||||
"wikidata_id": 'Q239303',
|
|
||||||
"official_api_documentation": 'http://api.artic.edu/docs/',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# if there is a need for globals, use a leading underline
|
|
||||||
_my_online_engine = None
|
|
||||||
|
|
||||||
|
|
||||||
def init(engine_settings):
|
|
||||||
"""Initialization of the (online) engine. If no initialization is needed, drop
|
|
||||||
this init function.
|
|
||||||
|
|
||||||
"""
|
|
||||||
global _my_online_engine # pylint: disable=global-statement
|
|
||||||
_my_online_engine = engine_settings.get('name')
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
"""Build up the ``params`` for the online request. In this example we build a
|
|
||||||
URL to fetch images from `artic.edu <https://artic.edu>`__
|
|
||||||
|
|
||||||
"""
|
|
||||||
args = urlencode(
|
|
||||||
{
|
|
||||||
'q': query,
|
|
||||||
'page': params['pageno'],
|
|
||||||
'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
|
|
||||||
'limit': page_size,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
params['url'] = search_api + args
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
"""Parse out the result items from the response. In this example we parse the
|
|
||||||
response from `api.artic.edu <https://artic.edu>`__ and filter out all
|
|
||||||
images.
|
|
||||||
|
|
||||||
"""
|
|
||||||
results = []
|
|
||||||
json_data = loads(resp.text)
|
|
||||||
|
|
||||||
for result in json_data['data']:
|
|
||||||
|
|
||||||
if not result['image_id']:
|
|
||||||
continue
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': 'https://artic.edu/artworks/%(id)s' % result,
|
|
||||||
'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result,
|
|
||||||
'content': result['medium_display'],
|
|
||||||
'author': ', '.join(result['artist_titles']),
|
|
||||||
'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
|
|
||||||
'img_format': result['dimensions'],
|
|
||||||
'template': 'images.html',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,81 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
Deviantart (Images)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.deviantart.com/',
|
|
||||||
"wikidata_id": 'Q46523',
|
|
||||||
"official_api_documentation": 'https://www.deviantart.com/developers/',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images']
|
|
||||||
paging = True
|
|
||||||
time_range_support = True
|
|
||||||
|
|
||||||
time_range_dict = {
|
|
||||||
'day': 'popular-24-hours',
|
|
||||||
'week': 'popular-1-week',
|
|
||||||
'month': 'popular-1-month',
|
|
||||||
'year': 'most-recent',
|
|
||||||
}
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
base_url = 'https://www.deviantart.com'
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
# https://www.deviantart.com/search/deviations?page=5&q=foo
|
|
||||||
|
|
||||||
query = {
|
|
||||||
'page': params['pageno'],
|
|
||||||
'q': query,
|
|
||||||
}
|
|
||||||
if params['time_range'] in time_range_dict:
|
|
||||||
query['order'] = time_range_dict[params['time_range']]
|
|
||||||
|
|
||||||
params['url'] = base_url + '/search/deviations?' + urlencode(query)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for row in dom.xpath('//div[contains(@data-hook, "content_row")]'):
|
|
||||||
for result in row.xpath('./div'):
|
|
||||||
|
|
||||||
a_tag = result.xpath('.//a[@data-hook="deviation_link"]')[0]
|
|
||||||
noscript_tag = a_tag.xpath('.//noscript')
|
|
||||||
|
|
||||||
if noscript_tag:
|
|
||||||
img_tag = noscript_tag[0].xpath('.//img')
|
|
||||||
else:
|
|
||||||
img_tag = a_tag.xpath('.//img')
|
|
||||||
if not img_tag:
|
|
||||||
continue
|
|
||||||
img_tag = img_tag[0]
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'images.html',
|
|
||||||
'url': a_tag.attrib.get('href'),
|
|
||||||
'img_src': img_tag.attrib.get('src'),
|
|
||||||
'title': img_tag.attrib.get('alt'),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,60 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
Dictzone
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
from lxml import html
|
|
||||||
from searx.utils import eval_xpath
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://dictzone.com/',
|
|
||||||
"wikidata_id": None,
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
engine_type = 'online_dictionary'
|
|
||||||
categories = ['general']
|
|
||||||
url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
|
|
||||||
weight = 100
|
|
||||||
|
|
||||||
results_xpath = './/table[@id="r"]/tr'
|
|
||||||
https_support = True
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
|
|
||||||
for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
|
|
||||||
try:
|
|
||||||
from_result, to_results_raw = eval_xpath(result, './td')
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
to_results = []
|
|
||||||
for to_result in eval_xpath(to_results_raw, './p/a'):
|
|
||||||
t = to_result.text_content()
|
|
||||||
if t.strip():
|
|
||||||
to_results.append(to_result.text_content())
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': urljoin(str(resp.url), '?%d' % k),
|
|
||||||
'title': from_result.text_content(),
|
|
||||||
'content': '; '.join(to_results),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
DigBT (Videos, Music, Files)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
from lxml import html
|
|
||||||
from searx.utils import extract_text, get_torrent_size
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://digbt.org',
|
|
||||||
"wikidata_id": None,
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['videos', 'music', 'files']
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
URL = 'https://digbt.org'
|
|
||||||
SEARCH_URL = URL + '/search/{query}-time-{pageno}'
|
|
||||||
FILESIZE = 3
|
|
||||||
FILESIZE_MULTIPLIER = 4
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
dom = html.fromstring(resp.text)
|
|
||||||
search_res = dom.xpath('.//td[@class="x-item"]')
|
|
||||||
|
|
||||||
if not search_res:
|
|
||||||
return list()
|
|
||||||
|
|
||||||
results = list()
|
|
||||||
for result in search_res:
|
|
||||||
url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
|
|
||||||
title = extract_text(result.xpath('.//a[@title]'))
|
|
||||||
content = extract_text(result.xpath('.//div[@class="files"]'))
|
|
||||||
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
|
|
||||||
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
|
|
||||||
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'url': url,
|
|
||||||
'title': title,
|
|
||||||
'content': content,
|
|
||||||
'filesize': filesize,
|
|
||||||
'magnetlink': magnetlink,
|
|
||||||
'seed': 'N/A',
|
|
||||||
'leech': 'N/A',
|
|
||||||
'template': 'torrent.html',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""Docker Hub (IT)
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=use-dict-literal
|
|
||||||
|
|
||||||
from json import loads
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from dateutil import parser
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://hub.docker.com',
|
|
||||||
"wikidata_id": 'Q100769064',
|
|
||||||
"official_api_documentation": 'https://docs.docker.com/registry/spec/api/',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
categories = ['it'] # optional
|
|
||||||
paging = True
|
|
||||||
|
|
||||||
base_url = "https://hub.docker.com/"
|
|
||||||
search_url = base_url + "api/content/v1/products/search?{query}&type=image&page_size=25"
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
params['url'] = search_url.format(query=urlencode(dict(q=query, page=params["pageno"])))
|
|
||||||
params["headers"]["Search-Version"] = "v3"
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
'''post-response callback
|
|
||||||
resp: requests response object
|
|
||||||
'''
|
|
||||||
results = []
|
|
||||||
body = loads(resp.text)
|
|
||||||
|
|
||||||
# Make sure `summaries` isn't `null`
|
|
||||||
search_res = body.get("summaries")
|
|
||||||
if search_res:
|
|
||||||
for item in search_res:
|
|
||||||
result = {}
|
|
||||||
|
|
||||||
# Make sure correct URL is set
|
|
||||||
filter_type = item.get("filter_type")
|
|
||||||
is_official = filter_type in ["store", "official"]
|
|
||||||
|
|
||||||
if is_official:
|
|
||||||
result["url"] = base_url + "_/" + item.get('slug', "")
|
|
||||||
else:
|
|
||||||
result["url"] = base_url + "r/" + item.get('slug', "")
|
|
||||||
result["title"] = item.get("name")
|
|
||||||
result["content"] = item.get("short_description")
|
|
||||||
result["publishedDate"] = parser.parse(item.get("updated_at") or item.get("created_at"))
|
|
||||||
result["thumbnail"] = item["logo_url"].get("large") or item["logo_url"].get("small")
|
|
||||||
results.append(result)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,86 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
Doku Wiki
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
from lxml.html import fromstring
|
|
||||||
from searx.utils import extract_text, eval_xpath
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://www.dokuwiki.org/',
|
|
||||||
"wikidata_id": 'Q851864',
|
|
||||||
"official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
|
|
||||||
paging = False
|
|
||||||
number_of_results = 5
|
|
||||||
|
|
||||||
# search-url
|
|
||||||
# Doku is OpenSearch compatible
|
|
||||||
base_url = 'http://localhost:8090'
|
|
||||||
search_url = (
|
|
||||||
# fmt: off
|
|
||||||
'/?do=search'
|
|
||||||
'&{query}'
|
|
||||||
# fmt: on
|
|
||||||
)
|
|
||||||
# TODO '&startRecord={offset}'
|
|
||||||
# TODO '&maximumRecords={limit}'
|
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
params['url'] = base_url + search_url.format(query=urlencode({'id': query}))
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
# get response from search-request
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
doc = fromstring(resp.text)
|
|
||||||
|
|
||||||
# parse results
|
|
||||||
# Quickhits
|
|
||||||
for r in eval_xpath(doc, '//div[@class="search_quickresult"]/ul/li'):
|
|
||||||
try:
|
|
||||||
res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not res_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'title': title, 'content': "", 'url': base_url + res_url})
|
|
||||||
|
|
||||||
# Search results
|
|
||||||
for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
|
|
||||||
try:
|
|
||||||
if r.tag == "dt":
|
|
||||||
res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
|
|
||||||
title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
|
|
||||||
elif r.tag == "dd":
|
|
||||||
content = extract_text(eval_xpath(r, '.'))
|
|
||||||
|
|
||||||
# append result
|
|
||||||
results.append({'title': title, 'content': content, 'url': base_url + res_url})
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not res_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# return results
|
|
||||||
return results
|
|
||||||
@@ -1,437 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
DuckDuckGo Lite
|
|
||||||
~~~~~~~~~~~~~~~
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
import re
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
import json
|
|
||||||
import babel
|
|
||||||
import lxml.html
|
|
||||||
|
|
||||||
from searx import (
|
|
||||||
locales,
|
|
||||||
redislib,
|
|
||||||
external_bang,
|
|
||||||
)
|
|
||||||
from searx.utils import (
|
|
||||||
eval_xpath,
|
|
||||||
eval_xpath_getindex,
|
|
||||||
extract_text,
|
|
||||||
)
|
|
||||||
from searx.network import get # see https://github.com/searxng/searxng/issues/762
|
|
||||||
from searx import redisdb
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
from searx.exceptions import SearxEngineAPIException
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://lite.duckduckgo.com/lite/',
|
|
||||||
"wikidata_id": 'Q12805',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'HTML',
|
|
||||||
}
|
|
||||||
|
|
||||||
send_accept_language_header = True
|
|
||||||
"""DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
|
|
||||||
``Accept-Language``. Optional the user can select a region filter (but not a
|
|
||||||
language).
|
|
||||||
"""
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['general', 'web']
|
|
||||||
paging = True
|
|
||||||
time_range_support = True
|
|
||||||
safesearch = True # user can't select but the results are filtered
|
|
||||||
|
|
||||||
url = 'https://lite.duckduckgo.com/lite/'
|
|
||||||
# url_ping = 'https://duckduckgo.com/t/sl_l'
|
|
||||||
|
|
||||||
time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
|
|
||||||
form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
|
|
||||||
|
|
||||||
|
|
||||||
def cache_vqd(query, value):
|
|
||||||
"""Caches a ``vqd`` value from a query.
|
|
||||||
|
|
||||||
The vqd value depends on the query string and is needed for the follow up
|
|
||||||
pages or the images loaded by a XMLHttpRequest:
|
|
||||||
|
|
||||||
- DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
|
|
||||||
- DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
|
|
||||||
|
|
||||||
"""
|
|
||||||
c = redisdb.client()
|
|
||||||
if c:
|
|
||||||
logger.debug("cache vqd value: %s", value)
|
|
||||||
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
|
|
||||||
c.set(key, value, ex=600)
|
|
||||||
|
|
||||||
|
|
||||||
def get_vqd(query, headers):
|
|
||||||
"""Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached
|
|
||||||
(:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
|
|
||||||
response.
|
|
||||||
|
|
||||||
"""
|
|
||||||
value = None
|
|
||||||
c = redisdb.client()
|
|
||||||
if c:
|
|
||||||
key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
|
|
||||||
value = c.get(key)
|
|
||||||
if value:
|
|
||||||
value = value.decode('utf-8')
|
|
||||||
logger.debug("re-use cached vqd value: %s", value)
|
|
||||||
return value
|
|
||||||
|
|
||||||
query_url = 'https://duckduckgo.com/?q={query}&atb=v290-5'.format(query=urlencode({'q': query}))
|
|
||||||
res = get(query_url, headers=headers)
|
|
||||||
content = res.text # type: ignore
|
|
||||||
if content.find('vqd=\"') == -1:
|
|
||||||
raise SearxEngineAPIException('Request failed')
|
|
||||||
value = content[content.find('vqd=\"') + 5 :]
|
|
||||||
value = value[: value.find('\'')]
|
|
||||||
logger.debug("new vqd value: %s", value)
|
|
||||||
cache_vqd(query, value)
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
|
|
||||||
"""Get DuckDuckGo's language identifier from SearXNG's locale.
|
|
||||||
|
|
||||||
DuckDuckGo defines its lanaguages by region codes (see
|
|
||||||
:py:obj:`fetch_traits`).
|
|
||||||
|
|
||||||
To get region and language of a DDG service use:
|
|
||||||
|
|
||||||
.. code: python
|
|
||||||
|
|
||||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
|
||||||
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
|
||||||
|
|
||||||
It might confuse, but the ``l`` value of the cookie is what SearXNG calls
|
|
||||||
the *region*:
|
|
||||||
|
|
||||||
.. code:: python
|
|
||||||
|
|
||||||
# !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
|
|
||||||
params['cookies']['ad'] = eng_lang
|
|
||||||
params['cookies']['ah'] = eng_region
|
|
||||||
params['cookies']['l'] = eng_region
|
|
||||||
|
|
||||||
.. hint::
|
|
||||||
|
|
||||||
`DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
|
|
||||||
selection to the user, only a region can be selected by the user
|
|
||||||
(``eng_region`` from the example above). DDG-lite stores the selected
|
|
||||||
region in a cookie::
|
|
||||||
|
|
||||||
params['cookies']['kl'] = eng_region # 'ar-es'
|
|
||||||
|
|
||||||
"""
|
|
||||||
return eng_traits.custom['lang_region'].get( # type: ignore
|
|
||||||
sxng_locale, eng_traits.get_language(sxng_locale, default)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
ddg_reg_map = {
|
|
||||||
'tw-tzh': 'zh_TW',
|
|
||||||
'hk-tzh': 'zh_HK',
|
|
||||||
'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES
|
|
||||||
'es-ca': 'ca_ES',
|
|
||||||
'id-en': 'id_ID',
|
|
||||||
'no-no': 'nb_NO',
|
|
||||||
'jp-jp': 'ja_JP',
|
|
||||||
'kr-kr': 'ko_KR',
|
|
||||||
'xa-ar': 'ar_SA',
|
|
||||||
'sl-sl': 'sl_SI',
|
|
||||||
'th-en': 'th_TH',
|
|
||||||
'vn-en': 'vi_VN',
|
|
||||||
}
|
|
||||||
|
|
||||||
ddg_lang_map = {
|
|
||||||
# use ar --> ar_EG (Egypt's arabic)
|
|
||||||
"ar_DZ": 'lang_region',
|
|
||||||
"ar_JO": 'lang_region',
|
|
||||||
"ar_SA": 'lang_region',
|
|
||||||
# use bn --> bn_BD
|
|
||||||
'bn_IN': 'lang_region',
|
|
||||||
# use de --> de_DE
|
|
||||||
'de_CH': 'lang_region',
|
|
||||||
# use en --> en_US,
|
|
||||||
'en_AU': 'lang_region',
|
|
||||||
'en_CA': 'lang_region',
|
|
||||||
'en_GB': 'lang_region',
|
|
||||||
# Esperanto
|
|
||||||
'eo_XX': 'eo',
|
|
||||||
# use es --> es_ES,
|
|
||||||
'es_AR': 'lang_region',
|
|
||||||
'es_CL': 'lang_region',
|
|
||||||
'es_CO': 'lang_region',
|
|
||||||
'es_CR': 'lang_region',
|
|
||||||
'es_EC': 'lang_region',
|
|
||||||
'es_MX': 'lang_region',
|
|
||||||
'es_PE': 'lang_region',
|
|
||||||
'es_UY': 'lang_region',
|
|
||||||
'es_VE': 'lang_region',
|
|
||||||
# use fr --> rf_FR
|
|
||||||
'fr_CA': 'lang_region',
|
|
||||||
'fr_CH': 'lang_region',
|
|
||||||
'fr_BE': 'lang_region',
|
|
||||||
# use nl --> nl_NL
|
|
||||||
'nl_BE': 'lang_region',
|
|
||||||
# use pt --> pt_PT
|
|
||||||
'pt_BR': 'lang_region',
|
|
||||||
# skip these languages
|
|
||||||
'od_IN': 'skip',
|
|
||||||
'io_XX': 'skip',
|
|
||||||
'tokipona_XX': 'skip',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
# quote ddg bangs
|
|
||||||
query_parts = []
|
|
||||||
# for val in re.split(r'(\s+)', query):
|
|
||||||
for val in re.split(r'(\s+)', query):
|
|
||||||
if not val.strip():
|
|
||||||
continue
|
|
||||||
if val.startswith('!') and external_bang.get_node(external_bang.EXTERNAL_BANGS, val[1:]):
|
|
||||||
val = f"'{val}'"
|
|
||||||
query_parts.append(val)
|
|
||||||
query = ' '.join(query_parts)
|
|
||||||
|
|
||||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
|
||||||
# eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
|
||||||
|
|
||||||
params['url'] = url
|
|
||||||
params['method'] = 'POST'
|
|
||||||
params['data']['q'] = query
|
|
||||||
|
|
||||||
# The API is not documented, so we do some reverse engineering and emulate
|
|
||||||
# what https://lite.duckduckgo.com/lite/ does when you press "next Page"
|
|
||||||
# link again and again ..
|
|
||||||
|
|
||||||
params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
|
|
||||||
params['headers']['Referer'] = 'https://google.com/'
|
|
||||||
|
|
||||||
# initial page does not have an offset
|
|
||||||
if params['pageno'] == 2:
|
|
||||||
# second page does have an offset of 30
|
|
||||||
offset = (params['pageno'] - 1) * 30
|
|
||||||
params['data']['s'] = offset
|
|
||||||
params['data']['dc'] = offset + 1
|
|
||||||
|
|
||||||
elif params['pageno'] > 2:
|
|
||||||
# third and following pages do have an offset of 30 + n*50
|
|
||||||
offset = 30 + (params['pageno'] - 2) * 50
|
|
||||||
params['data']['s'] = offset
|
|
||||||
params['data']['dc'] = offset + 1
|
|
||||||
|
|
||||||
# request needs a vqd argument
|
|
||||||
params['data']['vqd'] = get_vqd(query, params["headers"])
|
|
||||||
|
|
||||||
# initial page does not have additional data in the input form
|
|
||||||
if params['pageno'] > 1:
|
|
||||||
|
|
||||||
params['data']['o'] = form_data.get('o', 'json')
|
|
||||||
params['data']['api'] = form_data.get('api', 'd.js')
|
|
||||||
params['data']['nextParams'] = form_data.get('nextParams', '')
|
|
||||||
params['data']['v'] = form_data.get('v', 'l')
|
|
||||||
|
|
||||||
params['data']['kl'] = eng_region
|
|
||||||
params['cookies']['kl'] = eng_region
|
|
||||||
|
|
||||||
params['data']['df'] = ''
|
|
||||||
if params['time_range'] in time_range_dict:
|
|
||||||
params['data']['df'] = time_range_dict[params['time_range']]
|
|
||||||
params['cookies']['df'] = time_range_dict[params['time_range']]
|
|
||||||
|
|
||||||
logger.debug("param data: %s", params['data'])
|
|
||||||
logger.debug("param cookies: %s", params['cookies'])
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
|
|
||||||
if resp.status_code == 303:
|
|
||||||
return []
|
|
||||||
|
|
||||||
results = []
|
|
||||||
doc = lxml.html.fromstring(resp.text)
|
|
||||||
|
|
||||||
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
|
|
||||||
|
|
||||||
if len(result_table) == 2:
|
|
||||||
# some locales (at least China) does not have a "next page" button and
|
|
||||||
# the layout of the HTML tables is different.
|
|
||||||
result_table = result_table[1]
|
|
||||||
elif not len(result_table) >= 3:
|
|
||||||
# no more results
|
|
||||||
return []
|
|
||||||
else:
|
|
||||||
result_table = result_table[2]
|
|
||||||
# update form data from response
|
|
||||||
form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
|
|
||||||
if len(form):
|
|
||||||
|
|
||||||
form = form[0]
|
|
||||||
form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
|
|
||||||
form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
|
|
||||||
form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
|
|
||||||
logger.debug('form_data: %s', form_data)
|
|
||||||
|
|
||||||
value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
|
|
||||||
query = resp.search_params['data']['q']
|
|
||||||
cache_vqd(query, value)
|
|
||||||
|
|
||||||
tr_rows = eval_xpath(result_table, './/tr')
|
|
||||||
# In the last <tr> is the form of the 'previous/next page' links
|
|
||||||
tr_rows = tr_rows[:-1]
|
|
||||||
|
|
||||||
len_tr_rows = len(tr_rows)
|
|
||||||
offset = 0
|
|
||||||
|
|
||||||
while len_tr_rows >= offset + 4:
|
|
||||||
|
|
||||||
# assemble table rows we need to scrap
|
|
||||||
tr_title = tr_rows[offset]
|
|
||||||
tr_content = tr_rows[offset + 1]
|
|
||||||
offset += 4
|
|
||||||
|
|
||||||
# ignore sponsored Adds <tr class="result-sponsored">
|
|
||||||
if tr_content.get('class') == 'result-sponsored':
|
|
||||||
continue
|
|
||||||
|
|
||||||
a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
|
|
||||||
if a_tag is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
|
|
||||||
if td_content is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'title': a_tag.text_content(),
|
|
||||||
'content': extract_text(td_content),
|
|
||||||
'url': a_tag.get('href'),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_traits(engine_traits: EngineTraits):
|
|
||||||
"""Fetch languages & regions from DuckDuckGo.
|
|
||||||
|
|
||||||
SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
|
|
||||||
DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
|
|
||||||
sense in a SearXNG request since SearXNG's ``all`` will not add a
|
|
||||||
``Accept-Language`` HTTP header. The value in ``engine_traits.all_locale``
|
|
||||||
is ``wt-wt`` (the region).
|
|
||||||
|
|
||||||
Beside regions DuckDuckGo also defines its lanaguages by region codes. By
|
|
||||||
example these are the english languages in DuckDuckGo:
|
|
||||||
|
|
||||||
- en_US
|
|
||||||
- en_AU
|
|
||||||
- en_CA
|
|
||||||
- en_GB
|
|
||||||
|
|
||||||
The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
|
|
||||||
SearXNG's locale.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# pylint: disable=too-many-branches, too-many-statements
|
|
||||||
# fetch regions
|
|
||||||
|
|
||||||
engine_traits.all_locale = 'wt-wt'
|
|
||||||
|
|
||||||
# updated from u588 to u661 / should be updated automatically?
|
|
||||||
resp = get('https://duckduckgo.com/util/u661.js')
|
|
||||||
|
|
||||||
if not resp.ok: # type: ignore
|
|
||||||
print("ERROR: response from DuckDuckGo is not OK.")
|
|
||||||
|
|
||||||
pos = resp.text.find('regions:{') + 8 # type: ignore
|
|
||||||
js_code = resp.text[pos:] # type: ignore
|
|
||||||
pos = js_code.find('}') + 1
|
|
||||||
regions = json.loads(js_code[:pos])
|
|
||||||
|
|
||||||
for eng_tag, name in regions.items():
|
|
||||||
|
|
||||||
if eng_tag == 'wt-wt':
|
|
||||||
engine_traits.all_locale = 'wt-wt'
|
|
||||||
continue
|
|
||||||
|
|
||||||
region = ddg_reg_map.get(eng_tag)
|
|
||||||
if region == 'skip':
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not region:
|
|
||||||
eng_territory, eng_lang = eng_tag.split('-')
|
|
||||||
region = eng_lang + '_' + eng_territory.upper()
|
|
||||||
|
|
||||||
try:
|
|
||||||
sxng_tag = locales.region_tag(babel.Locale.parse(region))
|
|
||||||
except babel.UnknownLocaleError:
|
|
||||||
print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
|
|
||||||
continue
|
|
||||||
|
|
||||||
conflict = engine_traits.regions.get(sxng_tag)
|
|
||||||
if conflict:
|
|
||||||
if conflict != eng_tag:
|
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
|
||||||
continue
|
|
||||||
engine_traits.regions[sxng_tag] = eng_tag
|
|
||||||
|
|
||||||
# fetch languages
|
|
||||||
|
|
||||||
engine_traits.custom['lang_region'] = {}
|
|
||||||
|
|
||||||
pos = resp.text.find('languages:{') + 10 # type: ignore
|
|
||||||
js_code = resp.text[pos:] # type: ignore
|
|
||||||
pos = js_code.find('}') + 1
|
|
||||||
js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
|
|
||||||
languages = json.loads(js_code)
|
|
||||||
|
|
||||||
for eng_lang, name in languages.items():
|
|
||||||
|
|
||||||
if eng_lang == 'wt_WT':
|
|
||||||
continue
|
|
||||||
|
|
||||||
babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
|
|
||||||
if babel_tag == 'skip':
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
|
|
||||||
if babel_tag == 'lang_region':
|
|
||||||
sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
|
|
||||||
engine_traits.custom['lang_region'][sxng_tag] = eng_lang
|
|
||||||
continue
|
|
||||||
|
|
||||||
sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
|
|
||||||
|
|
||||||
except babel.UnknownLocaleError:
|
|
||||||
print("ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
|
|
||||||
continue
|
|
||||||
|
|
||||||
conflict = engine_traits.languages.get(sxng_tag)
|
|
||||||
if conflict:
|
|
||||||
if conflict != eng_lang:
|
|
||||||
print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
|
|
||||||
continue
|
|
||||||
engine_traits.languages[sxng_tag] = eng_lang
|
|
||||||
@@ -1,255 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
DuckDuckGo Instant Answer API
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
|
|
||||||
reverse engineering we can see that some services (e.g. instant answers) still
|
|
||||||
in use from the DDG search engine.
|
|
||||||
|
|
||||||
As far we can say the *instant answers* API does not support languages, or at
|
|
||||||
least we could not find out how language support should work. It seems that
|
|
||||||
most of the features are based on English terms.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from urllib.parse import urlencode, urlparse, urljoin
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
from searx.data import WIKIDATA_UNITS
|
|
||||||
from searx.utils import extract_text, html_to_text, get_string_replaces_function
|
|
||||||
from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://duckduckgo.com/',
|
|
||||||
"wikidata_id": 'Q12805',
|
|
||||||
"official_api_documentation": 'https://duckduckgo.com/api',
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON',
|
|
||||||
}
|
|
||||||
|
|
||||||
send_accept_language_header = True
|
|
||||||
|
|
||||||
URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
|
||||||
|
|
||||||
WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
|
|
||||||
|
|
||||||
replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
|
|
||||||
|
|
||||||
|
|
||||||
def is_broken_text(text):
|
|
||||||
"""duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
|
|
||||||
|
|
||||||
The href URL is broken, the "Related website" may contains some HTML.
|
|
||||||
|
|
||||||
The best solution seems to ignore these results.
|
|
||||||
"""
|
|
||||||
return text.startswith('http') and ' ' in text
|
|
||||||
|
|
||||||
|
|
||||||
def result_to_text(text, htmlResult):
|
|
||||||
# TODO : remove result ending with "Meaning" or "Category" # pylint: disable=fixme
|
|
||||||
result = None
|
|
||||||
dom = html.fromstring(htmlResult)
|
|
||||||
a = dom.xpath('//a')
|
|
||||||
if len(a) >= 1:
|
|
||||||
result = extract_text(a[0])
|
|
||||||
else:
|
|
||||||
result = text
|
|
||||||
if not is_broken_text(result):
|
|
||||||
return result
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
params['url'] = URL.format(query=urlencode({'q': query}))
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
|
|
||||||
results = []
|
|
||||||
|
|
||||||
search_res = resp.json()
|
|
||||||
|
|
||||||
# search_res.get('Entity') possible values (not exhaustive) :
|
|
||||||
# * continent / country / department / location / waterfall
|
|
||||||
# * actor / musician / artist
|
|
||||||
# * book / performing art / film / television / media franchise / concert tour / playwright
|
|
||||||
# * prepared food
|
|
||||||
# * website / software / os / programming language / file format / software engineer
|
|
||||||
# * company
|
|
||||||
|
|
||||||
content = ''
|
|
||||||
heading = search_res.get('Heading', '')
|
|
||||||
attributes = []
|
|
||||||
urls = []
|
|
||||||
infobox_id = None
|
|
||||||
relatedTopics = []
|
|
||||||
|
|
||||||
# add answer if there is one
|
|
||||||
answer = search_res.get('Answer', '')
|
|
||||||
if answer:
|
|
||||||
logger.debug('AnswerType="%s" Answer="%s"', search_res.get('AnswerType'), answer)
|
|
||||||
if search_res.get('AnswerType') not in ['calc', 'ip']:
|
|
||||||
results.append({'answer': html_to_text(answer)})
|
|
||||||
|
|
||||||
# add infobox
|
|
||||||
if 'Definition' in search_res:
|
|
||||||
content = content + search_res.get('Definition', '')
|
|
||||||
|
|
||||||
if 'Abstract' in search_res:
|
|
||||||
content = content + search_res.get('Abstract', '')
|
|
||||||
|
|
||||||
# image
|
|
||||||
image = search_res.get('Image')
|
|
||||||
image = None if image == '' else image
|
|
||||||
if image is not None and urlparse(image).netloc == '':
|
|
||||||
image = urljoin('https://duckduckgo.com', image)
|
|
||||||
|
|
||||||
# urls
|
|
||||||
# Official website, Wikipedia page
|
|
||||||
for ddg_result in search_res.get('Results', []):
|
|
||||||
firstURL = ddg_result.get('FirstURL')
|
|
||||||
text = ddg_result.get('Text')
|
|
||||||
if firstURL is not None and text is not None:
|
|
||||||
urls.append({'title': text, 'url': firstURL})
|
|
||||||
results.append({'title': heading, 'url': firstURL})
|
|
||||||
|
|
||||||
# related topics
|
|
||||||
for ddg_result in search_res.get('RelatedTopics', []):
|
|
||||||
if 'FirstURL' in ddg_result:
|
|
||||||
firstURL = ddg_result.get('FirstURL')
|
|
||||||
text = ddg_result.get('Text')
|
|
||||||
if not is_broken_text(text):
|
|
||||||
suggestion = result_to_text(text, ddg_result.get('Result'))
|
|
||||||
if suggestion != heading and suggestion is not None:
|
|
||||||
results.append({'suggestion': suggestion})
|
|
||||||
elif 'Topics' in ddg_result:
|
|
||||||
suggestions = []
|
|
||||||
relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions})
|
|
||||||
for topic_result in ddg_result.get('Topics', []):
|
|
||||||
suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result'))
|
|
||||||
if suggestion != heading and suggestion is not None:
|
|
||||||
suggestions.append(suggestion)
|
|
||||||
|
|
||||||
# abstract
|
|
||||||
abstractURL = search_res.get('AbstractURL', '')
|
|
||||||
if abstractURL != '':
|
|
||||||
# add as result ? problem always in english
|
|
||||||
infobox_id = abstractURL
|
|
||||||
urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True})
|
|
||||||
results.append({'url': abstractURL, 'title': heading})
|
|
||||||
|
|
||||||
# definition
|
|
||||||
definitionURL = search_res.get('DefinitionURL', '')
|
|
||||||
if definitionURL != '':
|
|
||||||
# add as result ? as answer ? problem always in english
|
|
||||||
infobox_id = definitionURL
|
|
||||||
urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
|
|
||||||
|
|
||||||
# to merge with wikidata's infobox
|
|
||||||
if infobox_id:
|
|
||||||
infobox_id = replace_http_by_https(infobox_id)
|
|
||||||
|
|
||||||
# attributes
|
|
||||||
# some will be converted to urls
|
|
||||||
if 'Infobox' in search_res:
|
|
||||||
infobox = search_res.get('Infobox')
|
|
||||||
if 'content' in infobox:
|
|
||||||
osm_zoom = 17
|
|
||||||
coordinates = None
|
|
||||||
for info in infobox.get('content'):
|
|
||||||
data_type = info.get('data_type')
|
|
||||||
data_label = info.get('label')
|
|
||||||
data_value = info.get('value')
|
|
||||||
|
|
||||||
# Workaround: ddg may return a double quote
|
|
||||||
if data_value == '""':
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Is it an external URL ?
|
|
||||||
# * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile
|
|
||||||
# * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id
|
|
||||||
# * netflix_id
|
|
||||||
external_url = get_external_url(data_type, data_value)
|
|
||||||
if external_url is not None:
|
|
||||||
urls.append({'title': data_label, 'url': external_url})
|
|
||||||
elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']:
|
|
||||||
# ignore instance: Wikidata value from "Instance Of" (Qxxxx)
|
|
||||||
# ignore wiki_maps_trigger: reference to a javascript
|
|
||||||
# ignore google_play_artist_id: service shutdown
|
|
||||||
pass
|
|
||||||
elif data_type == 'string' and data_label == 'Website':
|
|
||||||
# There is already an URL for the website
|
|
||||||
pass
|
|
||||||
elif data_type == 'area':
|
|
||||||
attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'})
|
|
||||||
osm_zoom = area_to_osm_zoom(data_value.get('amount'))
|
|
||||||
elif data_type == 'coordinates':
|
|
||||||
if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2':
|
|
||||||
# coordinate on Earth
|
|
||||||
# get the zoom information from the area
|
|
||||||
coordinates = info
|
|
||||||
else:
|
|
||||||
# coordinate NOT on Earth
|
|
||||||
attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'})
|
|
||||||
elif data_type == 'string':
|
|
||||||
attributes.append({'label': data_label, 'value': data_value})
|
|
||||||
|
|
||||||
if coordinates:
|
|
||||||
data_label = coordinates.get('label')
|
|
||||||
data_value = coordinates.get('value')
|
|
||||||
latitude = data_value.get('latitude')
|
|
||||||
longitude = data_value.get('longitude')
|
|
||||||
url = get_earth_coordinates_url(latitude, longitude, osm_zoom)
|
|
||||||
urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'})
|
|
||||||
|
|
||||||
if len(heading) > 0:
|
|
||||||
# TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme
|
|
||||||
if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0:
|
|
||||||
results.append({'url': urls[0]['url'], 'title': heading, 'content': content})
|
|
||||||
else:
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'infobox': heading,
|
|
||||||
'id': infobox_id,
|
|
||||||
'content': content,
|
|
||||||
'img_src': image,
|
|
||||||
'attributes': attributes,
|
|
||||||
'urls': urls,
|
|
||||||
'relatedTopics': relatedTopics,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def unit_to_str(unit):
|
|
||||||
for prefix in WIKIDATA_PREFIX:
|
|
||||||
if unit.startswith(prefix):
|
|
||||||
wikidata_entity = unit[len(prefix) :]
|
|
||||||
return WIKIDATA_UNITS.get(wikidata_entity, unit)
|
|
||||||
return unit
|
|
||||||
|
|
||||||
|
|
||||||
def area_to_str(area):
|
|
||||||
"""parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
|
|
||||||
unit = unit_to_str(area.get('unit'))
|
|
||||||
if unit is not None:
|
|
||||||
try:
|
|
||||||
amount = float(area.get('amount'))
|
|
||||||
return '{} {}'.format(amount, unit)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
return '{} {}'.format(area.get('amount', ''), area.get('unit', ''))
|
|
||||||
@@ -1,100 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
"""
|
|
||||||
DuckDuckGo Images
|
|
||||||
~~~~~~~~~~~~~~~~~
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from urllib.parse import urlencode
|
|
||||||
|
|
||||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
|
||||||
from searx.engines.duckduckgo import (
|
|
||||||
get_ddg_lang,
|
|
||||||
get_vqd,
|
|
||||||
)
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
# about
|
|
||||||
about = {
|
|
||||||
"website": 'https://duckduckgo.com/',
|
|
||||||
"wikidata_id": 'Q12805',
|
|
||||||
"use_official_api": False,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": 'JSON (site requires js to get images)',
|
|
||||||
}
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ['images', 'web']
|
|
||||||
paging = True
|
|
||||||
safesearch = True
|
|
||||||
send_accept_language_header = True
|
|
||||||
|
|
||||||
safesearch_cookies = {0: '-2', 1: None, 2: '1'}
|
|
||||||
safesearch_args = {0: '1', 1: None, 2: '1'}
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
|
||||||
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
|
||||||
|
|
||||||
args = {
|
|
||||||
'q': query,
|
|
||||||
'o': 'json',
|
|
||||||
# 'u': 'bing',
|
|
||||||
'l': eng_region,
|
|
||||||
'vqd': get_vqd(query, params["headers"]),
|
|
||||||
}
|
|
||||||
|
|
||||||
if params['pageno'] > 1:
|
|
||||||
args['s'] = (params['pageno'] - 1) * 100
|
|
||||||
|
|
||||||
params['cookies']['ad'] = eng_lang # zh_CN
|
|
||||||
params['cookies']['ah'] = eng_region # "us-en,de-de"
|
|
||||||
params['cookies']['l'] = eng_region # "hk-tzh"
|
|
||||||
logger.debug("cookies: %s", params['cookies'])
|
|
||||||
|
|
||||||
safe_search = safesearch_cookies.get(params['safesearch'])
|
|
||||||
if safe_search is not None:
|
|
||||||
params['cookies']['p'] = safe_search # "-2", "1"
|
|
||||||
safe_search = safesearch_args.get(params['safesearch'])
|
|
||||||
if safe_search is not None:
|
|
||||||
args['p'] = safe_search # "-1", "1"
|
|
||||||
|
|
||||||
args = urlencode(args)
|
|
||||||
params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,')
|
|
||||||
|
|
||||||
params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01'
|
|
||||||
params['headers']['Referer'] = 'https://duckduckgo.com/'
|
|
||||||
params['headers']['X-Requested-With'] = 'XMLHttpRequest'
|
|
||||||
logger.debug("headers: %s", params['headers'])
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
res_json = resp.json()
|
|
||||||
|
|
||||||
for result in res_json['results']:
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
'template': 'images.html',
|
|
||||||
'title': result['title'],
|
|
||||||
'content': '',
|
|
||||||
'thumbnail_src': result['thumbnail'],
|
|
||||||
'img_src': result['image'],
|
|
||||||
'url': result['url'],
|
|
||||||
'img_format': '%s x %s' % (result['width'], result['height']),
|
|
||||||
'source': result['source'],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
@@ -1,163 +0,0 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
||||||
# lint: pylint
|
|
||||||
"""
|
|
||||||
DuckDuckGo Weather
|
|
||||||
~~~~~~~~~~~~~~~~~~
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
from json import loads
|
|
||||||
from urllib.parse import quote
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from flask_babel import gettext
|
|
||||||
|
|
||||||
from searx.engines.duckduckgo import fetch_traits # pylint: disable=unused-import
|
|
||||||
from searx.engines.duckduckgo import get_ddg_lang
|
|
||||||
from searx.enginelib.traits import EngineTraits
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
import logging
|
|
||||||
|
|
||||||
logger: logging.Logger
|
|
||||||
|
|
||||||
traits: EngineTraits
|
|
||||||
|
|
||||||
|
|
||||||
about = {
|
|
||||||
"website": 'https://duckduckgo.com/',
|
|
||||||
"wikidata_id": 'Q12805',
|
|
||||||
"official_api_documentation": None,
|
|
||||||
"use_official_api": True,
|
|
||||||
"require_api_key": False,
|
|
||||||
"results": "JSON",
|
|
||||||
}
|
|
||||||
|
|
||||||
send_accept_language_header = True
|
|
||||||
|
|
||||||
# engine dependent config
|
|
||||||
categories = ["weather"]
|
|
||||||
URL = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
|
|
||||||
|
|
||||||
|
|
||||||
def generate_condition_table(condition):
|
|
||||||
res = ""
|
|
||||||
|
|
||||||
res += f"<tr><td><b>{gettext('Condition')}</b></td>" f"<td><b>{condition['summary']}</b></td></tr>"
|
|
||||||
|
|
||||||
res += (
|
|
||||||
f"<tr><td><b>{gettext('Temperature')}</b></td>"
|
|
||||||
f"<td><b>{f_to_c(condition['temperature'])}°C / {condition['temperature']}°F</b></td></tr>"
|
|
||||||
)
|
|
||||||
|
|
||||||
res += (
|
|
||||||
f"<tr><td>{gettext('Feels like')}</td><td>{f_to_c(condition['apparentTemperature'])}°C / "
|
|
||||||
f"{condition['apparentTemperature']}°F</td></tr>"
|
|
||||||
)
|
|
||||||
|
|
||||||
res += (
|
|
||||||
f"<tr><td>{gettext('Wind')}</td><td>{condition['windBearing']}° — "
|
|
||||||
f"{(condition['windSpeed'] * 1.6093440006147):.2f} km/h / {condition['windSpeed']} mph</td></tr>"
|
|
||||||
)
|
|
||||||
|
|
||||||
res += f"<tr><td>{gettext('Visibility')}</td><td>{condition['visibility']} km</td>"
|
|
||||||
|
|
||||||
res += f"<tr><td>{gettext('Humidity')}</td><td>{(condition['humidity'] * 100):.1f}%</td></tr>"
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def generate_day_table(day):
|
|
||||||
res = ""
|
|
||||||
|
|
||||||
res += (
|
|
||||||
f"<tr><td>{gettext('Min temp.')}</td><td>{f_to_c(day['temperatureLow'])}°C / "
|
|
||||||
f"{day['temperatureLow']}°F</td></tr>"
|
|
||||||
)
|
|
||||||
res += (
|
|
||||||
f"<tr><td>{gettext('Max temp.')}</td><td>{f_to_c(day['temperatureHigh'])}°C / "
|
|
||||||
f"{day['temperatureHigh']}°F</td></tr>"
|
|
||||||
)
|
|
||||||
res += f"<tr><td>{gettext('UV index')}</td><td>{day['uvIndex']}</td></tr>"
|
|
||||||
res += (
|
|
||||||
f"<tr><td>{gettext('Sunrise')}</td><td>{datetime.fromtimestamp(day['sunriseTime']).strftime('%H:%M')}</td></tr>"
|
|
||||||
)
|
|
||||||
res += (
|
|
||||||
f"<tr><td>{gettext('Sunset')}</td><td>{datetime.fromtimestamp(day['sunsetTime']).strftime('%H:%M')}</td></tr>"
|
|
||||||
)
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def request(query, params):
|
|
||||||
|
|
||||||
eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
|
|
||||||
eng_lang = get_ddg_lang(traits, params['searxng_locale'])
|
|
||||||
|
|
||||||
# !ddw paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
|
|
||||||
params['cookies']['ad'] = eng_lang
|
|
||||||
params['cookies']['ah'] = eng_region
|
|
||||||
params['cookies']['l'] = eng_region
|
|
||||||
logger.debug("cookies: %s", params['cookies'])
|
|
||||||
|
|
||||||
params["url"] = URL.format(query=quote(query), lang=eng_lang.split('_')[0])
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def f_to_c(temperature):
|
|
||||||
return "%.2f" % ((temperature - 32) / 1.8)
|
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
if resp.text.strip() == "ddg_spice_forecast();":
|
|
||||||
return []
|
|
||||||
|
|
||||||
result = loads(resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2])
|
|
||||||
|
|
||||||
current = result["currently"]
|
|
||||||
|
|
||||||
title = result['flags']['ddg-location']
|
|
||||||
|
|
||||||
infobox = f"<h3>{gettext('Current condition')}</h3><table><tbody>"
|
|
||||||
|
|
||||||
infobox += generate_condition_table(current)
|
|
||||||
|
|
||||||
infobox += "</tbody></table>"
|
|
||||||
|
|
||||||
last_date = None
|
|
||||||
|
|
||||||
for time in result['hourly']['data']:
|
|
||||||
current_time = datetime.fromtimestamp(time['time'])
|
|
||||||
|
|
||||||
if last_date != current_time.date():
|
|
||||||
if last_date is not None:
|
|
||||||
infobox += "</tbody></table>"
|
|
||||||
|
|
||||||
infobox += f"<h3>{current_time.strftime('%Y-%m-%d')}</h3>"
|
|
||||||
|
|
||||||
infobox += "<table><tbody>"
|
|
||||||
|
|
||||||
for day in result['daily']['data']:
|
|
||||||
if datetime.fromtimestamp(day['time']).date() == current_time.date():
|
|
||||||
infobox += generate_day_table(day)
|
|
||||||
|
|
||||||
infobox += "</tbody></table><table><tbody>"
|
|
||||||
|
|
||||||
last_date = current_time.date()
|
|
||||||
|
|
||||||
infobox += f"<tr><td rowspan=\"7\"><b>{current_time.strftime('%H:%M')}</b></td></tr>"
|
|
||||||
|
|
||||||
infobox += generate_condition_table(time)
|
|
||||||
|
|
||||||
infobox += "</tbody></table>"
|
|
||||||
|
|
||||||
results.append(
|
|
||||||
{
|
|
||||||
"infobox": title,
|
|
||||||
"content": infobox,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return results
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user