first commit

2025-07-20 13:25:51 +10:00
commit a2971879f0
294 changed files with 42788 additions and 0 deletions
@@ -0,0 +1,16 @@
 # ignore all files and directories
 *
 # allow git to enter directories
 !*/
 venv/
 # keep essential project files
 !.gitignore
 !.gitattributes
 # allow YAMLs, shell scripts, and others
 !*.yml
 !*.yaml
 !*.sh
 !*.py
 !*.Dockerfile
 !Dockerfile
@@ -0,0 +1,51 @@
 import datetime, re, sys
 # Function to read log file and extract the run time
 def check_run_time(log_file_path, delta):
    try:
        #Set timezone info
        timezone_offset = +10.0  # Pacific Standard Time (UTC−08:00)
        tzinfo = datetime.timezone(datetime.timedelta(hours=timezone_offset))
        #Number of minutes allowable difference in last logged run vs current time
        #delta = 10
        # Read the log file
        with open(log_file_path, 'r') as file:
            lines = file.readlines()
        # Extract the run time line
        run_time_line = next(line for line in lines if "Run time" in line)
        # Parse the run time from the line
        run_time_str = re.search(r'Run time: (.+)', run_time_line).group(1)
        # Convert run time string to datetime object
        run_time = datetime.datetime.strptime(run_time_str, "%a %b %d at %H:%M")
        # Update the run time to the current year since log doesn't contain the year
        run_time = run_time.replace(tzinfo=tzinfo,year=datetime.datetime.now(tzinfo).year)
        # Get the current time
        current_time = datetime.datetime.now(tzinfo)
        # Calculate the time difference
        time_difference = current_time - run_time
        # Check if the run time is within the last 10 minutes
        if time_difference <= datetime.timedelta(minutes=delta):
            return "OK"
        else:
            return "FAIL"
    except Exception as e:
        return f"Error: {str(e)}"
 # Path to the log file
 log_file_path = sys.argv[1]
 delta = int(sys.argv[2])
 # Check the run time and print the result
 status = check_run_time(log_file_path, delta)
 print(status)
@@ -0,0 +1,18 @@
 version: "3"
 # More info at https://github.com/pi-hole/docker-pi-hole/ and https://docs.pi-hole.net/
 services:
  server:
    container_name: dns-proxy
 #    hostname: pihole.lan.ddnsgeek.com
    image: gists/dnscrypt-proxy
    environment:
      LOCAL_PORT: 5353
    networks:
      - traefik_default
    ports:
      - 5353:5353
 networks:
  traefik_default:
    external: true
@@ -0,0 +1,46 @@
 ---
 services:
  app:
    depends_on:
      postgres:
        condition: service_healthy
    image: docuseal/docuseal:latest
    ports:
      - 3000:3000
    volumes:
      - ./data:/data
    environment:
 #      - FORCE_SSL=${HOST}
      - DATABASE_URL=postgresql://postgres:tUUczQzCGy2pEWGawCUfhjihDkFwvwVNMs@postgres:5432/docuseal
    labels:
       - "traefik.http.routers.docuseal.rule=Host(`docuseal.lan.ddnsgeek.com`)"
       - "traefik.enable=true"
       - "traefik.http.routers.docuseal.entrypoints=websecure"
       - "traefik.http.routers.docuseal.tls.certresolver=myresolver"
       - "io.portainer.accesscontrol.public"
       - "traefik.http.routers.docuseal.middlewares=error-pages-middleware"
    networks:
      - traefik_default
    restart: always
  postgres:
    image: postgres:15
    volumes:
      - './data/database:/var/lib/postgresql/data'
    environment:
      POSTGRES_USER: postgres
      POSTGRES_PASSWORD: tUUczQzCGy2pEWGawCUfhjihDkFwvwVNMs
      POSTGRES_DB: docuseal
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 5s
      timeout: 5s
      retries: 5
    networks:
      - traefik_default
    restart: always
 networks:
  traefik_default:
    external: true
@@ -0,0 +1,39 @@
 version: "3"
 services:
  server:
    image: goofball222/dns-over-https # satishweb/doh-server:latest
 #    hostname: doh-server
    networks:
      - traefik_default
 #    environment:
      # Enable below line to see more logs
 #      DEBUG: "1"
 #      UPSTREAM_DNS_SERVER: "udp:pihole:53"
 #      DOH_HTTP_PREFIX: "/dns-query"
 #      DOH_SERVER_LISTEN: ":8053"
 #      DOH_SERVER_TIMEOUT: "10"
 #      DOH_SERVER_TRIES: "3"
 #      DOH_SERVER_VERBOSE: "true"
 #    volumes:
 #       - ./server:/server
 #       - ./data/app-config:/app-config
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.doh-server.rule=Host(`dns.lan.ddnsgeek.com`)"
      - "traefik.http.routers.doh-server.entrypoints=websecure"
      - "traefik.http.services.doh-server.loadbalancer.server.port=8053"
      - "traefik.http.routers.doh-server.middlewares=error-pages-middleware"
 #      - "traefik.http.middlewares.mw-doh-compression.compress=true"
      - "traefik.http.routers.doh-server.tls=true"
 #      - "traefik.http.middlewares.mw-doh-tls.headers.sslredirect=true"
 #      - "traefik.http.middlewares.mw-doh-tls.headers.sslforcehost=true"
      - "traefik.http.routers.doh-server.tls.certresolver=myresolver"
 #      - "traefik.http.routers.doh-server.tls.domains[0].main=dns.lan.ddnsgeek.com"
 #      - "traefik.http.routers.doh-server.tls.domains[0].sans=dns.lan.ddnsgeek.com"
      # Protection from requests flood
 #      - "traefik.http.middlewares.mw-doh-ratelimit.ratelimit.average=100"
 #      - "traefik.http.middlewares.mw-doh-ratelimit.ratelimit.burst=50"
 #      - "traefik.http.middlewares.mw-doh-ratelimit.ratelimit.period=10s"
 networks:
  traefik_default:
    external: true
@@ -0,0 +1,34 @@
 #version: '3.8'
 services:
  gitea:
    image: gitea/gitea:latest
 #    container_name: gitea
    restart: always
    environment:
      - USER_UID=1000
      - USER_GID=1000
      - GITEA__database__DB_TYPE=sqlite3
      - GITEA__server__ROOT_URL=https://gitea.lan.ddnsgeek.com/
    volumes:
      - ./data:/data
    networks:
 #      - proxy
      - traefik_default
    labels:
      - "traefik.enable=true"
      - "traefik.docker.network=proxy"
      - "traefik.http.routers.gitea.rule=Host(`gitea.lan.ddnsgeek.com`)"
      - "traefik.http.routers.gitea.entrypoints=websecure"
      - "traefik.http.routers.gitea.tls=true"
      - "traefik.http.routers.gitea.tls.certresolver=myresolver"
      - "traefik.http.services.gitea.loadbalancer.server.port=3000"
      - "io.portainer.accesscontrol.public"
 #volumes:
 #  gitea_data:
 networks:
  traefik_default:
    external: true
@@ -0,0 +1,12 @@
 FROM nextcloud:latest
 #RUN groupadd -r doods && useradd -m -s /bin/bash -d /opt/doods -g doods doods
 #RUN chsh -s /usr/sbin/nologin root
 #RUN chown -R doods:doods /opt/doods
 #ENV PATH "${PATH}:/opt/doods"
 #ENV HOME /opt/doods
 USER www-data
@@ -0,0 +1,144 @@
 version: "3"
 services:
  webapp:
    image: nextcloud
    deploy:
 #      resources:
 #        limits:
 #          cpus: '0.3'
 #          memory: 200m
      restart_policy:
        condition: on-failure
        max_attempts: 5
 #    read_only: true
 #    tmpfs:
 #      - /tmp
 #      - /var
 #      - /run
    restart: always
    hostname: nextcloud.lan.ddnsgeek.com
    volumes:
      - ./data:/var/www/html/data:rw
      - ./config:/var/www/html/config:rw
    depends_on:
      - database
      - redis
 #    ports:
 #      - 8083:80
 #      - 4433:443
    environment:
      - MYSQL_PASSWORD=R1m@dmin
      - MYSQL_DATABASE=nextcloud
      - MYSQL_USER=nextcloud
      - MYSQL_HOST=nextcloud_db:3306
      - NEXTCLOUD_TRUSTED_DOMAINS=nextcloud.lan.ddnsgeek.com
      - OVERWRITEPROTOCOL=https
      - OVERWRITECLIURL=https://nextcloud.lan.ddnsgeek.com
      - SMTP_HOST=smtp-mail.outlook.com
      - SMTP_SECURE=tls
      - SMTP_PORT=587
      - SMTP_AUTHTYPE=login
      - MAIL_FROM_ADDRESS=wayne.bennett@live.com
      - MAIL_DOMAIN=live.com
      - SMTP_NAME=wayne.bennett
      - SMTP_PASSWORD=uscdbrjunqmkgglf
      - REDIS_HOST=redis
 #      - REDIS_HOST_PASSWORD=R1m@dmin
    networks:
      - traefik_default
    labels:
      - "traefik.http.routers.nextcloud.rule=Host(`nextcloud.lan.ddnsgeek.com`)"
      - "traefik.enable=true"
      - "traefik.http.routers.nextcloud.entrypoints=websecure"
      - "traefik.http.routers.nextcloud.tls.certresolver=myresolver"
      - "io.portainer.accesscontrol.public"
      - "traefik.http.routers.nextcloud.middlewares=error-pages-middleware, nextcloud-dav, secHeaders@file, nextcloud-webfinger"
      - "traefik.http.middlewares.nextcloud-dav.replacepathregex.regex=^/.well-known/ca(l|rd)dav"
      - "traefik.http.middlewares.nextcloud-dav.replacepathregex.replacement=/remote.php/dav/"
      - "traefik.http.middlewares.nextcloud-nodeinfo.replacepathregex.regex=^/.well-known/nodeinfo"
      - "traefik.http.middlewares.nextcloud-nodeinfo.replacepathregex.replacement=/nextcloud/index.php/.well-known/nodeinfo/"
      - "traefik.http.middlewares.nextcloud-webfinger.redirectregex.permanent=true"
      - "traefik.http.middlewares.nextcloud-webfinger.redirectregex.regex=https://(.*)/.well-known/webfinger"
      - "traefik.http.middlewares.nextcloud-webfinger.redirectregex.replacement=https://$${1}/nextcloud/index.php/.well-known/webfinger"
 #      - "traefik.http.middlewares.nextcloudHeader.headers.stsSeconds=15552000"
 #      - "traefik.http.middlewares.nextcloudHeader.headers.stsIncludeSubdomains=true"
 #      - "traefik.http.middlewares.nextcloudHeader.headers.stsPreload=true"
 #      - "traefik.http.middlewares.nextcloudHeader.headers.forceSTSHeader=true"
 #      - "traefik.http.routers.nextcloud.middlewares=error-pages-middleware, secHeaders@file, nextcloud_redirectregex, nextcloud-webfinger"
 #      - "traefik.http.middlewares.nextcloud_redirectregex.redirectregex.permanent=true"
 #      - "traefik.http.middlewares.nextcloud_redirectregex.redirectregex.regex='https://(.*)/.well-known/(?:card|cal)dav'"
 #      - "traefik.http.middlewares.nextcloud_redirectregex.redirectregex.replacement='https://$${1}/remote.php/dav'"
  database:
    image: mariadb:11.4
 #    image: mariadb
 #    read_only: true
 #    tmpfs:
 #      - /tmp
 #      - /var
 #      - /run
 #      - /docker-entrypoint-initdb.d
    restart: always
    hostname: nextcloud_db
    command: --transaction-isolation=READ-COMMITTED --log-bin=binlog --binlog-format=ROW
    deploy:
 #      resources:
 #        limits:
 #          cpus: '0.3'
 #          memory: 300m
      restart_policy:
        condition: on-failure
        max_attempts: 5
    volumes:
      - ./database:/var/lib/mysql:rw
    environment:
      - MYSQL_ROOT_PASSWORD=R1m@dmin
      - MYSQL_PASSWORD=R1m@dmin
      - MYSQL_DATABASE=nextcloud
      - MYSQL_USER=nextcloud
      - MARIADB_AUTO_UPGRADE=1
      - NEXTCLOUD_ADMIN_USER=admin
      - NEXTCLOUD_ADMIN_PASSWORD=R1m@dmin
    networks:
      - traefik_default
    healthcheck:
      test: "/usr/bin/mysql --user=nextcloud --password=R1m@dmin --execute \"SHOW DATABASES;\""
    labels:
      - "io.portainer.accesscontrol.public"
  redis:
    image: "redis"
 #    read_only: true
 #    tmpfs:
 #      - /tmp
 #      - /var
 #      - /run
    deploy:
 #      resources:
 #        limits:
 #          cpus: '0.3'
 #          memory: 150m
      restart_policy:
        condition: on-failure
        max_attempts: 5
    command: redis-server --save 60 1 --loglevel warning
    environment:
      - REDIS_OVERCOMMIT_MEMORY=1
      - REDIS_ARGS="--requirepass R1m@dmin --user redis on >password ~* allcommands --user default off nopass nocommands"
    hostname: redis
 #    user: "linode"
    volumes:
      - ./data/redis:/data:rw
 #      - ./config.yaml:/opt/doods/config.yaml
    restart: unless-stopped
    networks:
      - traefik_default
    labels:
      - "io.portainer.accesscontrol.public"
 networks:
  traefik_default:
    external: true
@@ -0,0 +1,18 @@
 FROM passbolt/passbolt:latest-ce
 RUN groupadd -r passbolt && useradd -m -s /bin/bash -d /media/data -g passbolt passbolt
 RUN chsh -s /usr/sbin/nologin root
 RUN chown -R passbolt:passbolt /etc/nginx
 RUN chown -R passbolt:passbolt /var/lib/nginx
 RUN chown -R passbolt:passbolt /run
 #COPY nginx.conf /etc/nginx/nginx.conf
 #ENV PATH "${PATH}:/opt/doods"
 #ENV HOME /media/data
 USER passbolt
 # ENTRYPOINT ["python3", "main.py"]
 # CMD ["api"]
@@ -0,0 +1,64 @@
 version: "3.9"
 services:
  db:
    image: mariadb:10.11
    restart: unless-stopped
    environment:
      MYSQL_RANDOM_ROOT_PASSWORD: "true"
      MYSQL_DATABASE: "passbolt"
      MYSQL_USER: "passbolt"
      MYSQL_PASSWORD: "P4ssb0lt"
    volumes:
      - ./data/database:/var/lib/mysql
    networks:
      - traefik_default
  webapp:
    image: passbolt/passbolt:latest-ce
    #Alternatively you can use rootless:
    #image: passbolt/passbolt:latest-ce-non-root
    restart: unless-stopped
    depends_on:
      - db
    environment:
      APP_FULL_BASE_URL: https://passbolt.lan.ddnsgeek.com
      DATASOURCES_DEFAULT_HOST: "db"
      DATASOURCES_DEFAULT_USERNAME: "passbolt"
      DATASOURCES_DEFAULT_PASSWORD: "P4ssb0lt"
      DATASOURCES_DEFAULT_DATABASE: "passbolt"
    volumes:
      - ./data/gpg:/etc/passbolt/gpg
      - ./data/jwt:/etc/passbolt/jwt
    command:
      [
        "/usr/bin/wait-for.sh",
        "-t",
        "0",
        "db:3306",
        "--",
        "/docker-entrypoint.sh",
      ]
    networks:
      - traefik_default
    labels:
      - "traefik.http.routers.passbolt.rule=Host(`passbolt.lan.ddnsgeek.com`)"
      - "traefik.enable=true"
      - "traefik.http.routers.passbolt.entrypoints=websecure"
      - "traefik.http.routers.passbolt.tls.certresolver=myresolver"
      - "io.portainer.accesscontrol.public"
      - "traefik.http.routers.passbolt.middlewares=error-pages-middleware"
 #    ports:
 #      - 8082:80
 #      - 4432:443
    #Alternatively for non-root images:
    # - 80:8080
    # - 443:4433
 #volumes:
 #  database_volume:
 #  gpg_volume:
 #  jwt_volume:
 networks:
  traefik_default:
    external: true
@@ -0,0 +1,44 @@
 version: "3"
 # More info at https://github.com/pi-hole/docker-pi-hole/ and https://docs.pi-hole.net/
 services:
  server:
    container_name: pihole
    hostname: pihole.lan.ddnsgeek.com
    image: pihole/pihole:latest
    # For DHCP it is recommended to remove these ports and instead add: network_mode: "host"
    ports:
      - "53:53/tcp"
      - "53:53/udp"
 #      - "67:67/udp" # Only required if you are using Pi-hole as your DHCP server
 #      - "80:80/tcp"
    environment:
      TZ: 'Australia/Brisbane'
      WEBPASSWORD: 'R1m@dmin'
      VIRTUAL_HOST: 'pihole.lan.ddnsgeek.com'
    # Volumes store your data between container upgrades
    volumes:
      - './etc-pihole:/etc/pihole'
      - './etc-dnsmasq.d:/etc/dnsmasq.d'
    #   https://github.com/pi-hole/docker-pi-hole#note-on-capabilities
 #    cap_add:
 #      - NET_ADMIN # Required if you are using Pi-hole as your DHCP server, else not needed
    restart: unless-stopped
    labels:
      - "traefik.http.routers.pihole.rule=Host(`pihole.lan.ddnsgeek.com`)"
      - "traefik.http.routers.pihole.entrypoints=websecure"
      - "traefik.http.routers.pihole.tls=true"
      - "traefik.http.routers.pihole.tls.certresolver=myresolver"
      - "traefik.http.routers.pihole.tls.domains[0].main=pihole.lan.ddnsgeek.com"
      - "traefik.http.routers.pihole.tls.domains[0].sans=pihole.lan.ddnsgeek.com"
      - "traefik.http.services.pihole.loadbalancer.server.port=80"
      - "traefik.enable=true"
      - "traefik.http.routers.pihole.middlewares=error-pages-middleware"
    networks:
      - traefik_default
 networks:
  traefik_default:
    external: true
@@ -0,0 +1,6 @@
 FROM searxng/searxng:latest
 RUN chown -R searxng:searxng /etc
 USER searxng:searxng
@@ -0,0 +1,37 @@
 version: "3"
 services:
  webapp:
    image: searxng/searxng
    restart: always
    read_only: true
    tmpfs:
      - /tmp
      - /var
      - /run
    hostname: searxng.lan.ddnsgeek.com
    networks:
      - traefik_default
    deploy:
 #      resources:
 #        limits:
 #          cpus: '0.05'
 #          memory: 100m
      restart_policy:
        condition: on-failure
        max_attempts: 5
    labels:
       - "traefik.http.routers.searxng.rule=Host(`searxng.lan.ddnsgeek.com`)"
       - "traefik.enable=true"
       - "traefik.http.routers.searxng.entrypoints=websecure"
       - "traefik.http.routers.searxng.tls.certresolver=myresolver"
       - "io.portainer.accesscontrol.public"
       - "traefik.http.routers.searxng.middlewares=error-pages-middleware"
 #       - "traefik.http.services.searxng.loadbalancer.server.port=8888"
 #    ports:
 #      - 8081:8080
 #    healthcheck:
 #      test: "curl --fail http://localhost || exit 1"
 networks:
  traefik_default:
    external: true
@@ -0,0 +1,178 @@
 #!/bin/sh
 help() {
    cat <<EOF
 Command line:
  -h  Display this help
  -d  Dry run to update the configuration files.
  -f  Always update on the configuration files (existing files are renamed with
      the .old suffix).  Without this option, the new configuration files are
      copied with the .new suffix
 Environment variables:
  INSTANCE_NAME settings.yml : general.instance_name
  AUTOCOMPLETE  settings.yml : search.autocomplete
  BASE_URL      settings.yml : server.base_url
  MORTY_URL     settings.yml : result_proxy.url
  MORTY_KEY     settings.yml : result_proxy.key
  BIND_ADDRESS  uwsgi bind to the specified TCP socket using HTTP protocol.
                Default value: ${DEFAULT_BIND_ADDRESS}
 Volume:
  /etc/searxng  the docker entry point copies settings.yml and uwsgi.ini in
                this directory (see the -f command line option)"
 EOF
 }
 export DEFAULT_BIND_ADDRESS="0.0.0.0:8080"
 export BIND_ADDRESS="${BIND_ADDRESS:-${DEFAULT_BIND_ADDRESS}}"
 # Parse command line
 FORCE_CONF_UPDATE=0
 DRY_RUN=0
 while getopts "fdh" option
 do
    case $option in
        f) FORCE_CONF_UPDATE=1 ;;
        d) DRY_RUN=1 ;;
        h)
            help
            exit 0
            ;;
        *)
            echo "unknow option ${option}"
            exit 42
            ;;
    esac
 done
 get_searxng_version(){
    su searxng -c \
       'python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)"' \
       2>/dev/null
 }
 SEARXNG_VERSION="$(get_searxng_version)"
 export SEARXNG_VERSION
 echo "SearXNG version ${SEARXNG_VERSION}"
 # helpers to update the configuration files
 patch_uwsgi_settings() {
    CONF="$1"
    # update uwsg.ini
    sed -i \
        -e "s|workers = .*|workers = ${UWSGI_WORKERS:-%k}|g" \
        -e "s|threads = .*|threads = ${UWSGI_THREADS:-4}|g" \
        "${CONF}"
 }
 patch_searxng_settings() {
    CONF="$1"
    # Make sure that there is trailing slash at the end of BASE_URL
    # see https://www.gnu.org/savannah-checkouts/gnu/bash/manual/bash.html#Shell-Parameter-Expansion
    export BASE_URL="${BASE_URL%/}/"
    # update settings.yml
    sed -i \
        -e "s|base_url: false|base_url: ${BASE_URL}|g" \
        -e "s/instance_name: \"SearXNG\"/instance_name: \"${INSTANCE_NAME}\"/g" \
        -e "s/autocomplete: \"\"/autocomplete: \"${AUTOCOMPLETE}\"/g" \
        -e "s/ultrasecretkey/$(openssl rand -hex 32)/g" \
        "${CONF}"
    # Morty configuration
    if [ -n "${MORTY_KEY}" ] && [ -n "${MORTY_URL}" ]; then
        sed -i -e "s/image_proxy: false/image_proxy: true/g" \
            "${CONF}"
        cat >> "${CONF}" <<-EOF
 # Morty configuration
 result_proxy:
   url: ${MORTY_URL}
   key: !!binary "${MORTY_KEY}"
 EOF
    fi
 }
 update_conf() {
    FORCE_CONF_UPDATE=$1
    CONF="$2"
    NEW_CONF="${2}.new"
    OLD_CONF="${2}.old"
    REF_CONF="$3"
    PATCH_REF_CONF="$4"
    if [ -f "${CONF}" ]; then
        if [ "${REF_CONF}" -nt "${CONF}" ]; then
            # There is a new version
            if [ "$FORCE_CONF_UPDATE" -ne 0 ]; then
                # Replace the current configuration
                printf '⚠️  Automatically update %s to the new version\n' "${CONF}"
                if [ ! -f "${OLD_CONF}" ]; then
                    printf 'The previous configuration is saved to %s\n' "${OLD_CONF}"
                    mv "${CONF}" "${OLD_CONF}"
                fi
                cp "${REF_CONF}" "${CONF}"
                $PATCH_REF_CONF "${CONF}"
            else
                # Keep the current configuration
                printf '⚠️  Check new version %s to make sure SearXNG is working properly\n' "${NEW_CONF}"
                cp "${REF_CONF}" "${NEW_CONF}"
                $PATCH_REF_CONF "${NEW_CONF}"
            fi
        else
            printf 'Use existing %s\n' "${CONF}"
        fi
    else
        printf 'Create %s\n' "${CONF}"
        cp "${REF_CONF}" "${CONF}"
        $PATCH_REF_CONF "${CONF}"
    fi
 }
 # searx compatibility: copy /etc/searx/* to /etc/searxng/*
 SEARX_CONF=0
 if [ -f "/etc/searx/settings.yml" ]; then
    if  [ ! -f "${SEARXNG_SETTINGS_PATH}" ]; then
        printf '⚠️  /etc/searx/settings.yml is copied to /etc/searxng\n'
        cp "/etc/searx/settings.yml" "${SEARXNG_SETTINGS_PATH}"
    fi
    SEARX_CONF=1
 fi
 if [ -f "/etc/searx/uwsgi.ini" ]; then
    printf '⚠️  /etc/searx/uwsgi.ini is ignored. Use the volume /etc/searxng\n'
    SEARX_CONF=1
 fi
 if [ "$SEARX_CONF" -eq "1" ]; then
    printf '⚠️  The deprecated volume /etc/searx is mounted. Please update your configuration to use /etc/searxng ⚠️\n'
    cat << EOF > /etc/searx/deprecated_volume_read_me.txt
 This Docker image uses the volume /etc/searxng
 Update your configuration:
 * remove uwsgi.ini (or very carefully update your existing uwsgi.ini using https://github.com/searxng/searxng/blob/master/dockerfiles/uwsgi.ini )
 * mount /etc/searxng instead of /etc/searx
 EOF
 fi
 # end of searx compatibility
 # make sure there are uwsgi settings
 update_conf "${FORCE_CONF_UPDATE}" "${UWSGI_SETTINGS_PATH}" "/usr/local/searxng/dockerfiles/uwsgi.ini" "patch_uwsgi_settings"
 # make sure there are searxng settings
 update_conf "${FORCE_CONF_UPDATE}" "${SEARXNG_SETTINGS_PATH}" "/usr/local/searxng/searx/settings.yml" "patch_searxng_settings"
 # dry run (to update configuration files, then inspect them)
 if [ $DRY_RUN -eq 1 ]; then
    printf 'Dry run\n'
    exit
 fi
 unset MORTY_KEY
 # Start uwsgi
 printf 'Listen on %s\n' "${BIND_ADDRESS}"
 exec su-exec searxng:searxng uwsgi --master --http-socket "${BIND_ADDRESS}" "${UWSGI_SETTINGS_PATH}"
@@ -0,0 +1,205 @@
 # -*- coding: utf-8 -*-
 # SPDX-License-Identifier: AGPL-3.0-or-later
 import  sys, os
 from pallets_sphinx_themes import ProjectLink
 from searx import get_setting
 from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
 # Project --------------------------------------------------------------
 project = 'SearXNG'
 copyright = 'SearXNG team'
 author = 'SearXNG team'
 release, version = VERSION_STRING, VERSION_STRING
 SEARXNG_URL = get_setting('server.base_url') or 'https://example.org/searxng'
 ISSUE_URL = get_setting('brand.issue_url')
 DOCS_URL = get_setting('brand.docs_url')
 PUBLIC_INSTANCES = get_setting('brand.public_instances')
 PRIVACYPOLICY_URL = get_setting('general.privacypolicy_url')
 CONTACT_URL = get_setting('general.contact_url')
 WIKI_URL = get_setting('brand.wiki_url')
 # hint: sphinx.ext.viewcode won't highlight when 'highlight_language' [1] is set
 #       to string 'none' [2]
 #
 # [1] https://www.sphinx-doc.org/en/master/usage/extensions/viewcode.html
 # [2] https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-highlight_language
 highlight_language = 'default'
 # General --------------------------------------------------------------
 master_doc = "index"
 source_suffix = '.rst'
 numfig = True
 exclude_patterns = ['build-templates/*.rst', 'user/*.md']
 import searx.engines
 import searx.plugins
 import searx.webutils
 # import searx.webapp is needed to init the engines & plugins, to init a
 # (empty) secret_key is needed.
 searx.settings['server']['secret_key'] = ''
 import searx.webapp
 searx.engines.load_engines(searx.settings['engines'])
 jinja_contexts = {
    'searx': {
        'engines': searx.engines.engines,
        'plugins': searx.plugins.plugins,
        'version': {
            'node': os.getenv('NODE_MINIMUM_VERSION')
        },
        'enabled_engine_count': sum(not x.disabled for x in searx.engines.engines.values()),
        'categories': searx.engines.categories,
        'categories_as_tabs': {c: searx.engines.categories[c] for c in searx.settings['categories_as_tabs']},
    },
 }
 jinja_filters = {
    'group_engines_in_tab': searx.webutils.group_engines_in_tab,
 }
 # Let the Jinja template in configured_engines.rst access documented_modules
 # to automatically link documentation for modules if it exists.
 def setup(app):
    ENGINES_DOCNAME = 'user/configured_engines'
    def before_read_docs(app, env, docnames):
        assert ENGINES_DOCNAME in docnames
        docnames.remove(ENGINES_DOCNAME)
        docnames.append(ENGINES_DOCNAME)
        # configured_engines must come last so that sphinx already has
        # discovered the python module documentations
    def source_read(app, docname, source):
        if docname == ENGINES_DOCNAME:
            jinja_contexts['searx']['documented_modules'] = app.env.domains['py'].modules
    app.connect('env-before-read-docs', before_read_docs)
    app.connect('source-read', source_read)
 # usage::   lorem :patch:`f373169` ipsum
 extlinks = {}
 # upstream links
 extlinks['wiki'] = ('https://github.com/searxng/searxng/wiki/%s', ' %s')
 extlinks['pull'] = ('https://github.com/searxng/searxng/pull/%s', 'PR %s')
 extlinks['pull-searx'] = ('https://github.com/searx/searx/pull/%s', 'PR %s')
 # links to custom brand
 extlinks['origin'] = (GIT_URL + '/blob/' + GIT_BRANCH + '/%s', 'git://%s')
 extlinks['patch'] = (GIT_URL + '/commit/%s', '#%s')
 extlinks['docs'] = (DOCS_URL + '/%s', 'docs: %s')
 extlinks['pypi'] = ('https://pypi.org/project/%s', 'PyPi: %s')
 extlinks['man'] = ('https://manpages.debian.org/jump?q=%s', '%s')
 #extlinks['role'] = (
 #    'https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-%s', '')
 extlinks['duref'] = (
    'https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#%s', '%s')
 extlinks['durole'] = (
    'https://docutils.sourceforge.io/docs/ref/rst/roles.html#%s', '%s')
 extlinks['dudir'] =  (
    'https://docutils.sourceforge.io/docs/ref/rst/directives.html#%s', '%s')
 extlinks['ctan'] =  (
    'https://ctan.org/pkg/%s', 'CTAN: %s')
 extensions = [
    'sphinx.ext.imgmath',
    'sphinx.ext.extlinks',
    'sphinx.ext.viewcode',
    "sphinx.ext.autodoc",
    "sphinx.ext.intersphinx",
    "pallets_sphinx_themes",
    "sphinx_issues", # https://github.com/sloria/sphinx-issues/blob/master/README.rst
    "sphinx_jinja",  # https://github.com/tardyp/sphinx-jinja
    "sphinxcontrib.programoutput",  # https://github.com/NextThought/sphinxcontrib-programoutput
    'linuxdoc.kernel_include',  # Implementation of the 'kernel-include' reST-directive.
    'linuxdoc.rstFlatTable',    # Implementation of the 'flat-table' reST-directive.
    'linuxdoc.kfigure',         # Sphinx extension which implements scalable image handling.
    "sphinx_tabs.tabs", # https://github.com/djungelorm/sphinx-tabs
    'myst_parser',  # https://www.sphinx-doc.org/en/master/usage/markdown.html
    'notfound.extension',  # https://github.com/readthedocs/sphinx-notfound-page
 ]
 autodoc_default_options = {
    'member-order': 'groupwise',
 }
 myst_enable_extensions = [
  "replacements", "smartquotes"
 ]
 suppress_warnings = ['myst.domains']
 intersphinx_mapping = {
    "python": ("https://docs.python.org/3/", None),
    "babel" : ("https://babel.readthedocs.io/en/latest/", None),
    "flask": ("https://flask.palletsprojects.com/", None),
    "flask_babel": ("https://python-babel.github.io/flask-babel/", None),
    # "werkzeug": ("https://werkzeug.palletsprojects.com/", None),
    "jinja": ("https://jinja.palletsprojects.com/", None),
    "linuxdoc" : ("https://return42.github.io/linuxdoc/", None),
    "sphinx" : ("https://www.sphinx-doc.org/en/master/", None),
    "redis": ('https://redis.readthedocs.io/en/stable/', None),
 }
 issues_github_path = "searxng/searxng"
 # HTML -----------------------------------------------------------------
 # https://searxng.github.io/searxng --> '/searxng/'
 # https://docs.searxng.org --> '/'
 notfound_urls_prefix = '/'
 sys.path.append(os.path.abspath('_themes'))
 sys.path.insert(0, os.path.abspath("../utils/"))
 html_theme_path = ['_themes']
 html_theme = "searxng"
 # sphinx.ext.imgmath setup
 html_math_renderer = 'imgmath'
 imgmath_image_format = 'svg'
 imgmath_font_size = 14
 # sphinx.ext.imgmath setup END
 html_show_sphinx = False
 html_theme_options = {"index_sidebar_logo": True}
 html_context = {"project_links": [] }
 html_context["project_links"].append(ProjectLink("Source", GIT_URL + '/tree/' + GIT_BRANCH))
 if WIKI_URL:
    html_context["project_links"].append(ProjectLink("Wiki", WIKI_URL))
 if PUBLIC_INSTANCES:
    html_context["project_links"].append(ProjectLink("Public instances", PUBLIC_INSTANCES))
 if ISSUE_URL:
    html_context["project_links"].append(ProjectLink("Issue Tracker", ISSUE_URL))
 if PRIVACYPOLICY_URL:
    html_context["project_links"].append(ProjectLink("Privacy Policy", PRIVACYPOLICY_URL))
 if CONTACT_URL:
    html_context["project_links"].append(ProjectLink("Contact", CONTACT_URL))
 html_sidebars = {
    "**": [
        "globaltoc.html",
        "project.html",
        "relations.html",
        "searchbox.html",
        "sourcelink.html"
    ],
 }
 singlehtml_sidebars = {"index": ["project.html", "localtoc.html"]}
 html_logo = "../src/brand/searxng-wordmark.svg"
 html_title = "SearXNG Documentation ({})".format(VERSION_STRING)
 html_show_sourcelink = True
 # LaTeX ----------------------------------------------------------------
 latex_documents = [
    (master_doc, "searxng-{}.tex".format(VERSION_STRING), html_title, author, "manual")
 ]
@@ -0,0 +1 @@
 *.md
@@ -0,0 +1,25 @@
 categories = ['general']  # optional
 def request(query, params):
    '''pre-request callback
    params<dict>:
      method  : POST/GET
      headers : {}
      data    : {} # if method == POST
      url     : ''
      category: 'search category'
      pageno  : 1 # number of the requested page
    '''
    params['url'] = 'https://host/%s' % query
    return params
 def response(resp):
    '''post-response callback
    resp: requests response object
    '''
    return [{'url': '', 'title': '', 'content': ''}]
@@ -0,0 +1,106 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pylint: disable=missing-module-docstring
 import sys
 import os
 from os.path import dirname, abspath
 import logging
 import searx.unixthreadname
 import searx.settings_loader
 from searx.settings_defaults import settings_set_defaults
 # Debug
 LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s'
 # Production
 LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
 LOG_LEVEL_PROD = logging.WARNING
 searx_dir = abspath(dirname(__file__))
 searx_parent_dir = abspath(dirname(dirname(__file__)))
 settings, settings_load_message = searx.settings_loader.load_settings()
 if settings is not None:
    settings = settings_set_defaults(settings)
 _unset = object()
 def get_setting(name, default=_unset):
    """Returns the value to which ``name`` point.  If there is no such name in the
    settings and the ``default`` is unset, a :py:obj:`KeyError` is raised.
    """
    value = settings
    for a in name.split('.'):
        if isinstance(value, dict):
            value = value.get(a, _unset)
        else:
            value = _unset
        if value is _unset:
            if default is _unset:
                raise KeyError(name)
            value = default
            break
    return value
 def is_color_terminal():
    if os.getenv('TERM') in ('dumb', 'unknown'):
        return False
    return sys.stdout.isatty()
 def logging_config_debug():
    try:
        import coloredlogs  # pylint: disable=import-outside-toplevel
    except ImportError:
        coloredlogs = None
    log_level = os.environ.get('SEARXNG_DEBUG_LOG_LEVEL', 'DEBUG')
    if coloredlogs and is_color_terminal():
        level_styles = {
            'spam': {'color': 'green', 'faint': True},
            'debug': {},
            'notice': {'color': 'magenta'},
            'success': {'bold': True, 'color': 'green'},
            'info': {'bold': True, 'color': 'cyan'},
            'warning': {'color': 'yellow'},
            'error': {'color': 'red'},
            'critical': {'bold': True, 'color': 'red'},
        }
        field_styles = {
            'asctime': {'color': 'green'},
            'hostname': {'color': 'magenta'},
            'levelname': {'color': 8},
            'name': {'color': 8},
            'programname': {'color': 'cyan'},
            'username': {'color': 'yellow'},
        }
        coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG)
    else:
        logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
 searx_debug = settings['general']['debug']
 if searx_debug:
    logging_config_debug()
 else:
    logging.basicConfig(level=LOG_LEVEL_PROD, format=LOG_FORMAT_PROD)
    logging.root.setLevel(level=LOG_LEVEL_PROD)
    logging.getLogger('werkzeug').setLevel(level=LOG_LEVEL_PROD)
 logger = logging.getLogger('searx')
 logger.info(settings_load_message)
 # log max_request_timeout
 max_request_timeout = settings['outgoing']['max_request_timeout']
 if max_request_timeout is None:
    logger.info('max_request_timeout=%s', repr(max_request_timeout))
 else:
    logger.info('max_request_timeout=%i second(s)', max_request_timeout)
@@ -0,0 +1,46 @@
 from os import listdir
 from os.path import realpath, dirname, join, isdir
 from searx.utils import load_module
 from collections import defaultdict
 answerers_dir = dirname(realpath(__file__))
 def load_answerers():
    answerers = []
    for filename in listdir(answerers_dir):
        if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
            continue
        module = load_module('answerer.py', join(answerers_dir, filename))
        if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
            exit(2)
        answerers.append(module)
    return answerers
 def get_answerers_by_keywords(answerers):
    by_keyword = defaultdict(list)
    for answerer in answerers:
        for keyword in answerer.keywords:
            for keyword in answerer.keywords:
                by_keyword[keyword].append(answerer.answer)
    return by_keyword
 def ask(query):
    results = []
    query_parts = list(filter(None, query.query.split()))
    if not query_parts or query_parts[0] not in answerers_by_keywords:
        return results
    for answerer in answerers_by_keywords[query_parts[0]]:
        result = answerer(query)
        if result:
            results.append(result)
    return results
 answerers = load_answerers()
 answerers_by_keywords = get_answerers_by_keywords(answerers)
@@ -0,0 +1,70 @@
 import hashlib
 import random
 import string
 import uuid
 from flask_babel import gettext
 # required answerer attribute
 # specifies which search query keywords triggers this answerer
 keywords = ('random',)
 random_int_max = 2**31
 random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
 def random_characters():
    return [random.choice(random_string_letters) for _ in range(random.randint(8, 32))]
 def random_string():
    return ''.join(random_characters())
 def random_float():
    return str(random.random())
 def random_int():
    return str(random.randint(-random_int_max, random_int_max))
 def random_sha256():
    m = hashlib.sha256()
    m.update(''.join(random_characters()).encode())
    return str(m.hexdigest())
 def random_uuid():
    return str(uuid.uuid4())
 random_types = {
    'string': random_string,
    'int': random_int,
    'float': random_float,
    'sha256': random_sha256,
    'uuid': random_uuid,
 }
 # required answerer function
 # can return a list of results (any result type) for a given query
 def answer(query):
    parts = query.query.split()
    if len(parts) != 2:
        return []
    if parts[1] not in random_types:
        return []
    return [{'answer': random_types[parts[1]]()}]
 # required answerer function
 # returns information about the answerer
 def self_info():
    return {
        'name': gettext('Random value generator'),
        'description': gettext('Generate different random values'),
        'examples': ['random {}'.format(x) for x in random_types],
    }
@@ -0,0 +1,50 @@
 from functools import reduce
 from operator import mul
 from flask_babel import gettext
 keywords = ('min', 'max', 'avg', 'sum', 'prod')
 # required answerer function
 # can return a list of results (any result type) for a given query
 def answer(query):
    parts = query.query.split()
    if len(parts) < 2:
        return []
    try:
        args = list(map(float, parts[1:]))
    except:
        return []
    func = parts[0]
    answer = None
    if func == 'min':
        answer = min(args)
    elif func == 'max':
        answer = max(args)
    elif func == 'avg':
        answer = sum(args) / len(args)
    elif func == 'sum':
        answer = sum(args)
    elif func == 'prod':
        answer = reduce(mul, args, 1)
    if answer is None:
        return []
    return [{'answer': str(answer)}]
 # required answerer function
 # returns information about the answerer
 def self_info():
    return {
        'name': gettext('Statistics functions'),
        'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)),
        'examples': ['avg 123 548 2.04 24.2'],
    }
@@ -0,0 +1,228 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This module implements functions needed for the autocompleter.
 """
 # pylint: disable=use-dict-literal
 import json
 from urllib.parse import urlencode
 import lxml
 from httpx import HTTPError
 from searx import settings
 from searx.engines import (
    engines,
    google,
 )
 from searx.network import get as http_get
 from searx.exceptions import SearxEngineResponseException
 def get(*args, **kwargs):
    if 'timeout' not in kwargs:
        kwargs['timeout'] = settings['outgoing']['request_timeout']
    kwargs['raise_for_httperror'] = True
    return http_get(*args, **kwargs)
 def brave(query, _lang):
    # brave search autocompleter
    url = 'https://search.brave.com/api/suggest?'
    url += urlencode({'q': query})
    country = 'all'
    # if lang in _brave:
    #    country = lang
    kwargs = {'cookies': {'country': country}}
    resp = get(url, **kwargs)
    results = []
    if resp.ok:
        data = resp.json()
        for item in data[1]:
            results.append(item)
    return results
 def dbpedia(query, _lang):
    # dbpedia autocompleter, no HTTPS
    autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
    response = get(autocomplete_url + urlencode(dict(QueryString=query)))
    results = []
    if response.ok:
        dom = lxml.etree.fromstring(response.content)
        results = dom.xpath('//Result/Label//text()')
    return results
 def duckduckgo(query, sxng_locale):
    """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
    traits = engines['duckduckgo'].traits
    args = {
        'q': query,
        'kl': traits.get_region(sxng_locale, traits.all_locale),
    }
    url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
    resp = get(url)
    ret_val = []
    if resp.ok:
        j = resp.json()
        if len(j) > 1:
            ret_val = j[1]
    return ret_val
 def google_complete(query, sxng_locale):
    """Autocomplete from Google.  Supports Google's languages and subdomains
    (:py:obj:`searx.engines.google.get_google_info`) by using the async REST
    API::
        https://{subdomain}/complete/search?{args}
    """
    google_info = google.get_google_info({'searxng_locale': sxng_locale}, engines['google'].traits)
    url = 'https://{subdomain}/complete/search?{args}'
    args = urlencode(
        {
            'q': query,
            'client': 'gws-wiz',
            'hl': google_info['params']['hl'],
        }
    )
    results = []
    resp = get(url.format(subdomain=google_info['subdomain'], args=args))
    if resp.ok:
        json_txt = resp.text[resp.text.find('[') : resp.text.find(']', -3) + 1]
        data = json.loads(json_txt)
        for item in data[0]:
            results.append(lxml.html.fromstring(item[0]).text_content())
    return results
 def seznam(query, _lang):
    # seznam search autocompleter
    url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
    resp = get(
        url.format(
            query=urlencode(
                {'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
            )
        )
    )
    if not resp.ok:
        return []
    data = resp.json()
    return [
        ''.join([part.get('text', '') for part in item.get('text', [])])
        for item in data.get('result', [])
        if item.get('itemType', None) == 'ItemType.TEXT'
    ]
 def startpage(query, sxng_locale):
    """Autocomplete from Startpage. Supports Startpage's languages"""
    lui = engines['startpage'].traits.get_language(sxng_locale, 'english')
    url = 'https://startpage.com/suggestions?{query}'
    resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
    data = resp.json()
    return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
 def swisscows(query, _lang):
    # swisscows autocompleter
    url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
    resp = json.loads(get(url.format(query=urlencode({'query': query}))).text)
    return resp
 def qwant(query, sxng_locale):
    """Autocomplete from Qwant. Supports Qwant's regions."""
    results = []
    locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
    url = 'https://api.qwant.com/v3/suggest?{query}'
    resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
    if resp.ok:
        data = resp.json()
        if data['status'] == 'success':
            for item in data['data']['items']:
                results.append(item['value'])
    return results
 def wikipedia(query, sxng_locale):
    """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
    results = []
    eng_traits = engines['wikipedia'].traits
    wiki_lang = eng_traits.get_language(sxng_locale, 'en')
    wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
    url = 'https://{wiki_netloc}/w/api.php?{args}'
    args = urlencode(
        {
            'action': 'opensearch',
            'format': 'json',
            'formatversion': '2',
            'search': query,
            'namespace': '0',
            'limit': '10',
        }
    )
    resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
    if resp.ok:
        data = resp.json()
        if len(data) > 1:
            results = data[1]
    return results
 def yandex(query, _lang):
    # yandex autocompleter
    url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
    resp = json.loads(get(url.format(urlencode(dict(part=query)))).text)
    if len(resp) > 1:
        return resp[1]
    return []
 backends = {
    'dbpedia': dbpedia,
    'duckduckgo': duckduckgo,
    'google': google_complete,
    'seznam': seznam,
    'startpage': startpage,
    'swisscows': swisscows,
    'qwant': qwant,
    'wikipedia': wikipedia,
    'brave': brave,
    'yandex': yandex,
 }
 def search_autocomplete(backend_name, query, sxng_locale):
    backend = backends.get(backend_name)
    if backend is None:
        return []
    try:
        return backend(query, sxng_locale)
    except (HTTPError, SearxEngineResponseException):
        return []
@@ -0,0 +1,51 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This module implements the :origin:`searxng_msg <babel.cfg>` extractor to
 extract messages from:
 - :origin:`searx/searxng.msg`
 The ``searxng.msg`` files are selected by Babel_, see Babel's configuration in
 :origin:`babel.cfg`::
    searxng_msg = searx.babel_extract.extract
    ...
    [searxng_msg: **/searxng.msg]
 A ``searxng.msg`` file is a python file that is *executed* by the
 :py:obj:`extract` function.  Additional ``searxng.msg`` files can be added by:
 1. Adding a ``searxng.msg`` file in one of the SearXNG python packages and
 2. implement a method in :py:obj:`extract` that yields messages from this file.
 .. _Babel: https://babel.pocoo.org/en/latest/index.html
 """
 from os import path
 SEARXNG_MSG_FILE = "searxng.msg"
 _MSG_FILES = [path.join(path.dirname(__file__), SEARXNG_MSG_FILE)]
 def extract(
    # pylint: disable=unused-argument
    fileobj,
    keywords,
    comment_tags,
    options,
 ):
    """Extract messages from ``searxng.msg`` files by a custom extractor_.
    .. _extractor:
       https://babel.pocoo.org/en/latest/messages.html#writing-extraction-methods
    """
    if fileobj.name not in _MSG_FILES:
        raise RuntimeError("don't know how to extract messages from %s" % fileobj.name)
    namespace = {}
    exec(fileobj.read(), {}, namespace)  # pylint: disable=exec-used
    for name in namespace['__all__']:
        for k, v in namespace[name].items():
            yield 0, '_', v, ["%s['%s']" % (name, k)]
@@ -0,0 +1,27 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """.. _botdetection src:
 X-Forwarded-For
 ===============
 .. attention::
   A correct setup of the HTTP request headers ``X-Forwarded-For`` and
   ``X-Real-IP`` is essential to be able to assign a request to an IP correctly:
   - `NGINX RequestHeader`_
   - `Apache RequestHeader`_
 .. _NGINX RequestHeader:
    https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site
 .. _Apache RequestHeader:
    https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site
 .. autofunction:: searx.botdetection.get_real_ip
 """
 from ._helpers import dump_request
 from ._helpers import get_real_ip
 from ._helpers import too_many_requests
@@ -0,0 +1,120 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pylint: disable=missing-module-docstring, invalid-name
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
    IPv4Address,
    IPv6Address,
    ip_network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from searx import logger
 logger = logger.getChild('botdetection')
 def dump_request(request: flask.Request):
    return (
        request.path
        + " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
        + " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
        + " || form: %s" % request.form
        + " || Accept: %s" % request.headers.get('Accept')
        + " || Accept-Language: %s" % request.headers.get('Accept-Language')
        + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
        + " || Content-Type: %s" % request.headers.get('Content-Type')
        + " || Content-Length: %s" % request.headers.get('Content-Length')
        + " || Connection: %s" % request.headers.get('Connection')
        + " || User-Agent: %s" % request.headers.get('User-Agent')
    )
 def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
    """Returns a HTTP 429 response object and writes a ERROR message to the
    'botdetection' logger.  This function is used in part by the filter methods
    to return the default ``Too Many Requests`` response.
    """
    logger.debug("BLOCK %s: %s", network.compressed, log_msg)
    return flask.make_response(('Too Many Requests', 429))
 def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
    """Returns the (client) network of whether the real_ip is part of."""
    if real_ip.version == 6:
        prefix = cfg['real_ip.ipv6_prefix']
    else:
        prefix = cfg['real_ip.ipv4_prefix']
    network = ip_network(f"{real_ip}/{prefix}", strict=False)
    # logger.debug("get_network(): %s", network.compressed)
    return network
 def get_real_ip(request: flask.Request) -> str:
    """Returns real IP of the request.  Since not all proxies set all the HTTP
    headers and incoming headers can be faked it may happen that the IP cannot
    be determined correctly.
    .. sidebar:: :py:obj:`flask.Request.remote_addr`
       SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
    This function tries to get the remote IP in the order listed below,
    additional some tests are done and if inconsistencies or errors are
    detected, they are logged.
    The remote IP of the request is taken from (first match):
    - X-Forwarded-For_ header
    - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
    - :py:obj:`flask.Request.remote_addr`
    .. _ProxyFix:
       https://werkzeug.palletsprojects.com/middleware/proxy_fix/
    .. _X-Forwarded-For:
      https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
    """
    forwarded_for = request.headers.get("X-Forwarded-For")
    real_ip = request.headers.get('X-Real-IP')
    remote_addr = request.remote_addr
    # logger.debug(
    #     "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
    # )
    if not forwarded_for:
        logger.error("X-Forwarded-For header is not set!")
    else:
        from .limiter import get_cfg  # pylint: disable=import-outside-toplevel, cyclic-import
        forwarded_for = [x.strip() for x in forwarded_for.split(',')]
        x_for: int = get_cfg()['real_ip.x_for']  # type: ignore
        forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
    if not real_ip:
        logger.error("X-Real-IP header is not set!")
    if forwarded_for and real_ip and forwarded_for != real_ip:
        logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
    if forwarded_for and remote_addr and forwarded_for != remote_addr:
        logger.warning(
            "IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
        )
    if real_ip and remote_addr and real_ip != remote_addr:
        logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
    request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
    # logger.debug("get_real_ip() -> %s", request_ip)
    return request_ip
@@ -0,0 +1,39 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_accept``
 ----------------------
 The ``http_accept`` method evaluates a request as the request of a bot if the
 Accept_ header ..
 - did not contain ``text/html``
 .. _Accept:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    if 'text/html' not in request.accept_mimetypes:
        return too_many_requests(network, "HTTP header Accept did not contain text/html")
    return None
@@ -0,0 +1,41 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_accept_encoding``
 -------------------------------
 The ``http_accept_encoding`` method evaluates a request as the request of a
 bot if the Accept-Encoding_ header ..
 - did not contain ``gzip`` AND ``deflate`` (if both values are missed)
 - did not contain ``text/html``
 .. _Accept-Encoding:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
    if not ('gzip' in accept_list or 'deflate' in accept_list):
        return too_many_requests(network, "HTTP header Accept-Encoding did not contain gzip nor deflate")
    return None
@@ -0,0 +1,35 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_accept_language``
 -------------------------------
 The ``http_accept_language`` method evaluates a request as the request of a bot
 if the Accept-Language_ header is unset.
 .. _Accept-Language:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    if request.headers.get('Accept-Language', '').strip() == '':
        return too_many_requests(network, "missing HTTP header Accept-Language")
    return None
@@ -0,0 +1,37 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_connection``
 --------------------------
 The ``http_connection`` method evaluates a request as the request of a bot if
 the Connection_ header is set to ``close``.
 .. _Connection:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    if request.headers.get('Connection', '').strip() == 'close':
        return too_many_requests(network, "HTTP header 'Connection=close")
    return None
@@ -0,0 +1,67 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``http_user_agent``
 --------------------------
 The ``http_user_agent`` method evaluates a request as the request of a bot if
 the User-Agent_ header is unset or matches the regular expression
 :py:obj:`USER_AGENT`.
 .. _User-Agent:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 import re
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from ._helpers import too_many_requests
 USER_AGENT = (
    r'('
    + r'unknown'
    + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
    + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
    + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
    + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
    + r'|ZmEu|BLEXBot|bitlybot'
    # unmaintained Farside instances
    + r'|'
    + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
    # other bots and client to block
    + '|.*PetalBot.*'
    + r')'
 )
 """Regular expression that matches to User-Agent_ from known *bots*"""
 _regexp = None
 def regexp_user_agent():
    global _regexp  # pylint: disable=global-statement
    if not _regexp:
        _regexp = re.compile(USER_AGENT)
    return _regexp
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    user_agent = request.headers.get('User-Agent', 'unknown')
    if regexp_user_agent().match(user_agent):
        return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")
    return None
@@ -0,0 +1,148 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """.. _botdetection.ip_limit:
 Method ``ip_limit``
 -------------------
 The ``ip_limit`` method counts request from an IP in *sliding windows*.  If
 there are to many requests in a sliding window, the request is evaluated as a
 bot request.  This method requires a redis DB and needs a HTTP X-Forwarded-For_
 header.  To take privacy only the hash value of an IP is stored in the redis DB
 and at least for a maximum of 10 minutes.
 The :py:obj:`.link_token` method can be used to investigate whether a request is
 *suspicious*.  To activate the :py:obj:`.link_token` method in the
 :py:obj:`.ip_limit` method add the following to your
 ``/etc/searxng/limiter.toml``:
 .. code:: toml
   [botdetection.ip_limit]
   link_token = true
 If the :py:obj:`.link_token` method is activated and a request is *suspicious*
 the request rates are reduced:
 - :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
 - :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
 To intercept bots that get their IPs from a range of IPs, there is a
 :py:obj:`SUSPICIOUS_IP_WINDOW`.  In this window the suspicious IPs are stored
 for a longer time.  IPs stored in this sliding window have a maximum of
 :py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked.  As soon as the IP
 makes a request that is not suspicious, the sliding window for this IP is
 droped.
 .. _X-Forwarded-For:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
 """
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
 )
 import flask
 import werkzeug
 from searx.tools import config
 from searx import redisdb
 from searx.redislib import incr_sliding_window, drop_counter
 from . import link_token
 from ._helpers import (
    too_many_requests,
    logger,
 )
 logger = logger.getChild('ip_limit')
 BURST_WINDOW = 20
 """Time (sec) before sliding window for *burst* requests expires."""
 BURST_MAX = 15
 """Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
 BURST_MAX_SUSPICIOUS = 2
 """Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
 LONG_WINDOW = 600
 """Time (sec) before the longer sliding window expires."""
 LONG_MAX = 150
 """Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
 LONG_MAX_SUSPICIOUS = 10
 """Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
 API_WONDOW = 3600
 """Time (sec) before sliding window for API requests (format != html) expires."""
 API_MAX = 4
 """Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
 SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
 """Time (sec) before sliding window for one suspicious IP expires."""
 SUSPICIOUS_IP_MAX = 3
 """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
 def filter_request(
    network: IPv4Network | IPv6Network,
    request: flask.Request,
    cfg: config.Config,
 ) -> werkzeug.Response | None:
    # pylint: disable=too-many-return-statements
    redis_client = redisdb.client()
    if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
        logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
        return None
    if request.args.get('format', 'html') != 'html':
        c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
        if c > API_MAX:
            return too_many_requests(network, "too many request in API_WINDOW")
    if cfg['botdetection.ip_limit.link_token']:
        suspicious = link_token.is_suspicious(network, request, True)
        if not suspicious:
            # this IP is no longer suspicious: release ip again / delete the counter of this IP
            drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
            return None
        # this IP is suspicious: count requests from this IP
        c = incr_sliding_window(
            redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
        )
        if c > SUSPICIOUS_IP_MAX:
            logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
            return flask.redirect(flask.url_for('index'), code=302)
        c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
        if c > BURST_MAX_SUSPICIOUS:
            return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
        c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
        if c > LONG_MAX_SUSPICIOUS:
            return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
        return None
    # vanilla limiter without extensions counts BURST_MAX and LONG_MAX
    c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
    if c > BURST_MAX:
        return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
    c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
    if c > LONG_MAX:
        return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")
    return None
@@ -0,0 +1,85 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """.. _botdetection.ip_lists:
 Method ``ip_lists``
 -------------------
 The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
 :py:obj:`pass-lists <pass_ip>`.
 .. code:: toml
   [botdetection.ip_lists]
   pass_ip = [
    '140.238.172.132', # IPv4 of check.searx.space
    '192.168.0.0/16',  # IPv4 private network
    'fe80::/10'        # IPv6 linklocal
   ]
   block_ip = [
      '93.184.216.34', # IPv4 of example.org
      '257.1.1.1',     # invalid IP --> will be ignored, logged in ERROR class
   ]
 """
 # pylint: disable=unused-argument
 from __future__ import annotations
 from typing import Tuple
 from ipaddress import (
    ip_network,
    IPv4Address,
    IPv6Address,
 )
 from searx.tools import config
 from ._helpers import logger
 logger = logger.getChild('ip_limit')
 SEARXNG_ORG = [
    # https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
    '140.238.172.132',  # IPv4 check.searx.space
    '2603:c022:0:4900::/56',  # IPv6 check.searx.space
 ]
 """Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
 def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
    """Checks if the IP on the subnet is in one of the members of the
    ``botdetection.ip_lists.pass_ip`` list.
    """
    if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
        for net in SEARXNG_ORG:
            net = ip_network(net, strict=False)
            if real_ip.version == net.version and real_ip in net:
                return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
    return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)
 def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
    """Checks if the IP on the subnet is in one of the members of the
    ``botdetection.ip_lists.block_ip`` list.
    """
    block, msg = ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.block_ip', cfg)
    if block:
        msg += " To remove IP from list, please contact the maintainer of the service."
    return block, msg
 def ip_is_subnet_of_member_in_list(
    real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
 ) -> Tuple[bool, str]:
    for net in cfg.get(list_name, default=[]):
        try:
            net = ip_network(net, strict=False)
        except ValueError:
            logger.error("invalid IP %s in %s", net, list_name)
            continue
        if real_ip.version == net.version and real_ip in net:
            return True, f"IP matches {net.compressed} in {list_name}."
    return False, f"IP is not a member of an item in the f{list_name} list"
@@ -0,0 +1,147 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """.. _limiter src:
 Limiter
 =======
 .. sidebar:: info
   The limiter requires a :ref:`Redis <settings redis>` database.
 Bot protection / IP rate limitation.  The intention of rate limitation is to
 limit suspicious requests from an IP.  The motivation behind this is the fact
 that SearXNG passes through requests from bots and is thus classified as a bot
 itself.  As a result, the SearXNG engine then receives a CAPTCHA or is blocked
 by the search engine (the origin) in some other way.
 To avoid blocking, the requests from bots to SearXNG must also be blocked, this
 is the task of the limiter.  To perform this task, the limiter uses the methods
 from the :py:obj:`searx.botdetection`.
 To enable the limiter activate:
 .. code:: yaml
   server:
     ...
     limiter: true  # rate limit the number of request on the instance, block some bots
 and set the redis-url connection. Check the value, it depends on your redis DB
 (see :ref:`settings redis`), by example:
 .. code:: yaml
   redis:
     url: unix:///usr/local/searxng-redis/run/redis.sock?db=0
 """
 from __future__ import annotations
 from pathlib import Path
 from ipaddress import ip_address
 import flask
 import werkzeug
 from searx.tools import config
 from searx import logger
 from . import (
    http_accept,
    http_accept_encoding,
    http_accept_language,
    http_connection,
    http_user_agent,
    ip_limit,
    ip_lists,
 )
 from ._helpers import (
    get_network,
    get_real_ip,
    dump_request,
 )
 logger = logger.getChild('botdetection.limiter')
 CFG: config.Config = None  # type: ignore
 LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
 """Base configuration (schema) of the botdetection."""
 LIMITER_CFG = Path('/etc/searxng/limiter.toml')
 """Lokal Limiter configuration."""
 CFG_DEPRECATED = {
    # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests.  Don't use it in your real project config."
 }
 def get_cfg() -> config.Config:
    global CFG  # pylint: disable=global-statement
    if CFG is None:
        CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
    return CFG
 def filter_request(request: flask.Request) -> werkzeug.Response | None:
    # pylint: disable=too-many-return-statements
    cfg = get_cfg()
    real_ip = ip_address(get_real_ip(request))
    network = get_network(real_ip, cfg)
    if request.path == '/healthz':
        return None
    # link-local
    if network.is_link_local:
        return None
    # block- & pass- lists
    #
    # 1. The IP of the request is first checked against the pass-list; if the IP
    #    matches an entry in the list, the request is not blocked.
    # 2. If no matching entry is found in the pass-list, then a check is made against
    #    the block list; if the IP matches an entry in the list, the request is
    #    blocked.
    # 3. If the IP is not in either list, the request is not blocked.
    match, msg = ip_lists.pass_ip(real_ip, cfg)
    if match:
        logger.warning("PASS %s: matched PASSLIST - %s", network.compressed, msg)
        return None
    match, msg = ip_lists.block_ip(real_ip, cfg)
    if match:
        logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
        return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
    # methods applied on /
    for func in [
        http_user_agent,
    ]:
        val = func.filter_request(network, request, cfg)
        if val is not None:
            return val
    # methods applied on /search
    if request.path == '/search':
        for func in [
            http_accept,
            http_accept_encoding,
            http_accept_language,
            http_connection,
            http_user_agent,
            ip_limit,
        ]:
            val = func.filter_request(network, request, cfg)
            if val is not None:
                return val
    logger.debug(f"OK {network}: %s", dump_request(flask.request))
    return None
@@ -0,0 +1,157 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Method ``link_token``
 ---------------------
 The ``link_token`` method evaluates a request as :py:obj:`suspicious
 <is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
 client.  By adding a random component (the token) in the URL, a bot can not send
 a ping by request a static URL.
 .. note::
   This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
 To get in use of this method a flask URL route needs to be added:
 .. code:: python
   @app.route('/client<token>.css', methods=['GET', 'POST'])
   def client_token(token=None):
       link_token.ping(request, token)
       return Response('', mimetype='text/css')
 And in the HTML template from flask a stylesheet link is needed (the value of
 ``link_token`` comes from :py:obj:`get_token`):
 .. code:: html
   <link rel="stylesheet"
         href="{{ url_for('client_token', token=link_token) }}"
         type="text/css" />
 .. _X-Forwarded-For:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
 """
 from __future__ import annotations
 from ipaddress import (
    IPv4Network,
    IPv6Network,
    ip_address,
 )
 import string
 import random
 import flask
 from searx import logger
 from searx import redisdb
 from searx.redislib import secret_hash
 from ._helpers import (
    get_network,
    get_real_ip,
 )
 TOKEN_LIVE_TIME = 600
 """Livetime (sec) of limiter's CSS token."""
 PING_LIVE_TIME = 3600
 """Livetime (sec) of the ping-key from a client (request)"""
 PING_KEY = 'SearXNG_limiter.ping'
 """Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
 TOKEN_KEY = 'SearXNG_limiter.token'
 """Key for which the current token is stored in the DB"""
 logger = logger.getChild('botdetection.link_token')
 def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
    """Checks whether a valid ping is exists for this (client) network, if not
    this request is rated as *suspicious*.  If a valid ping exists and argument
    ``renew`` is ``True`` the expire time of this ping is reset to
    :py:obj:`PING_LIVE_TIME`.
    """
    redis_client = redisdb.client()
    if not redis_client:
        return False
    ping_key = get_ping_key(network, request)
    if not redis_client.get(ping_key):
        logger.warning("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
        return True
    if renew:
        redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
    logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
    return False
 def ping(request: flask.Request, token: str):
    """This function is called by a request to URL ``/client<token>.css``.  If
    ``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
    The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
    """
    from . import limiter  # pylint: disable=import-outside-toplevel, cyclic-import
    redis_client = redisdb.client()
    if not redis_client:
        return
    if not token_is_valid(token):
        return
    cfg = limiter.get_cfg()
    real_ip = ip_address(get_real_ip(request))
    network = get_network(real_ip, cfg)
    ping_key = get_ping_key(network, request)
    logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
    redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
 def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
    """Generates a hashed key that fits (more or less) to a *WEB-browser
    session* in a network."""
    return (
        PING_KEY
        + "["
        + secret_hash(
            network.compressed + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
        )
        + "]"
    )
 def token_is_valid(token) -> bool:
    valid = token == get_token()
    logger.debug("token is valid --> %s", valid)
    return valid
 def get_token() -> str:
    """Returns current token.  If there is no currently active token a new token
    is generated randomly and stored in the redis DB.
    - :py:obj:`TOKEN_LIVE_TIME`
    - :py:obj:`TOKEN_KEY`
    """
    redis_client = redisdb.client()
    if not redis_client:
        # This function is also called when limiter is inactive / no redis DB
        # (see render function in webapp.py)
        return '12345678'
    token = redis_client.get(TOKEN_KEY)
    if token:
        token = token.decode('UTF-8')
    else:
        token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
        redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
    return token
@@ -0,0 +1,73 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pyright: basic
 """Module for backward compatibility.
 """
 # pylint: disable=C,R
 __all__ = ('cached_property',)
 try:
    from functools import cached_property  # type: ignore
 except ImportError:
    # cache_property has been added in py3.8 [1]
    #
    # To support cache_property in py3.7 the implementation from 3.8 has been
    # copied here.  This code can be cleanup with EOL of py3.7.
    #
    # [1] https://docs.python.org/3/library/functools.html#functools.cached_property
    from threading import RLock
    _NOT_FOUND = object()
    class cached_property:
        def __init__(self, func):
            self.func = func
            self.attrname = None
            self.__doc__ = func.__doc__
            self.lock = RLock()
        def __set_name__(self, owner, name):
            if self.attrname is None:
                self.attrname = name
            elif name != self.attrname:
                raise TypeError(
                    "Cannot assign the same cached_property to two different names "
                    f"({self.attrname!r} and {name!r})."
                )
        def __get__(self, instance, owner=None):
            if instance is None:
                return self
            if self.attrname is None:
                raise TypeError("Cannot use cached_property instance without calling __set_name__ on it.")
            try:
                cache = instance.__dict__
            except AttributeError:  # not all objects have __dict__ (e.g. class defines slots)
                msg = (
                    f"No '__dict__' attribute on {type(instance).__name__!r} "
                    f"instance to cache {self.attrname!r} property."
                )
                raise TypeError(msg) from None
            val = cache.get(self.attrname, _NOT_FOUND)
            if val is _NOT_FOUND:
                with self.lock:
                    # check if another thread filled cache while we awaited lock
                    val = cache.get(self.attrname, _NOT_FOUND)
                    if val is _NOT_FOUND:
                        val = self.func(instance)
                        try:
                            cache[self.attrname] = val
                        except TypeError:
                            msg = (
                                f"The '__dict__' attribute on {type(instance).__name__!r} instance "
                                f"does not support item assignment for caching {self.attrname!r} property."
                            )
                            raise TypeError(msg) from None
            return val
@@ -0,0 +1,52 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This module holds the *data* created by::
  make data.all
 """
 __all__ = [
    'ENGINE_TRAITS',
    'CURRENCIES',
    'USER_AGENTS',
    'EXTERNAL_URLS',
    'WIKIDATA_UNITS',
    'EXTERNAL_BANGS',
    'OSM_KEYS_TAGS',
    'ENGINE_DESCRIPTIONS',
    'ahmia_blacklist_loader',
 ]
 import json
 from pathlib import Path
 data_dir = Path(__file__).parent
 def _load(filename):
    with open(data_dir / filename, encoding='utf-8') as f:
        return json.load(f)
 def ahmia_blacklist_loader():
    """Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
    names.  The MD5 values are fetched by::
      searxng_extra/update/update_ahmia_blacklist.py
    This function is used by :py:mod:`searx.plugins.ahmia_filter`.
    """
    with open(data_dir / 'ahmia_blacklist.txt', encoding='utf-8') as f:
        return f.read().split()
 CURRENCIES = _load('currencies.json')
 USER_AGENTS = _load('useragents.json')
 EXTERNAL_URLS = _load('external_urls.json')
 WIKIDATA_UNITS = _load('wikidata_units.json')
 EXTERNAL_BANGS = _load('external_bangs.json')
 OSM_KEYS_TAGS = _load('osm_keys_tags.json')
 ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
 ENGINE_TRAITS = _load('engine_traits.json')
@@ -0,0 +1,145 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Implementations of the framework for the SearXNG engines.
 .. hint::
   The long term goal is to modularize all implementations of the engine
   framework here in this Python package.  ToDo:
   - move implementations of the :ref:`searx.engines loader` to a new module in
     the :py:obj:`searx.enginelib` namespace.
 """
 from __future__ import annotations
 from typing import List, Callable, TYPE_CHECKING
 if TYPE_CHECKING:
    from searx.enginelib import traits
 class Engine:  # pylint: disable=too-few-public-methods
    """Class of engine instances build from YAML settings.
    Further documentation see :ref:`general engine configuration`.
    .. hint::
       This class is currently never initialized and only used for type hinting.
    """
    # Common options in the engine module
    engine_type: str
    """Type of the engine (:ref:`searx.search.processors`)"""
    paging: bool
    """Engine supports multiple pages."""
    time_range_support: bool
    """Engine supports search time range."""
    safesearch: bool
    """Engine supports SafeSearch"""
    language_support: bool
    """Engine supports languages (locales) search."""
    language: str
    """For an engine, when there is ``language: ...`` in the YAML settings the engine
    does support only this one language:
    .. code:: yaml
      - name: google french
        engine: google
        language: fr
    """
    region: str
    """For an engine, when there is ``region: ...`` in the YAML settings the engine
    does support only this one region::
    .. code:: yaml
      - name: google belgium
        engine: google
        region: fr-BE
    """
    fetch_traits: Callable
    """Function to to fetch engine's traits from origin."""
    traits: traits.EngineTraits
    """Traits of the engine."""
    # settings.yml
    categories: List[str]
    """Specifies to which :ref:`engine categories` the engine should be added."""
    name: str
    """Name that will be used across SearXNG to define this engine.  In settings, on
    the result page .."""
    engine: str
    """Name of the python file used to handle requests and responses to and from
    this search engine (file name from :origin:`searx/engines` without
    ``.py``)."""
    enable_http: bool
    """Enable HTTP (by default only HTTPS is enabled)."""
    shortcut: str
    """Code used to execute bang requests (``!foo``)"""
    timeout: float
    """Specific timeout for search-engine."""
    display_error_messages: bool
    """Display error messages on the web UI."""
    proxies: dict
    """Set proxies for a specific engine (YAML):
    .. code:: yaml
       proxies :
         http:  socks5://proxy:port
         https: socks5://proxy:port
    """
    disabled: bool
    """To disable by default the engine, but not deleting it.  It will allow the
    user to manually activate it in the settings."""
    inactive: bool
    """Remove the engine from the settings (*disabled & removed*)."""
    about: dict
    """Additional fileds describing the engine.
    .. code:: yaml
       about:
          website: https://example.com
          wikidata_id: Q306656
          official_api_documentation: https://example.com/api-doc
          use_official_api: true
          require_api_key: true
          results: HTML
    """
    using_tor_proxy: bool
    """Using tor proxy (``true``) or not (``false``) for this engine."""
    send_accept_language_header: bool
    """When this option is activated, the language (locale) that is selected by
    the user is used to build and send a ``Accept-Language`` header in the
    request to the origin search engine."""
    tokens: List[str]
    """A list of secret tokens to make this engine *private*, more details see
    :ref:`private engines`."""
@@ -0,0 +1,252 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Engine's traits are fetched from the origin engines and stored in a JSON file
 in the *data folder*.  Most often traits are languages and region codes and
 their mapping from SearXNG's representation to the representation in the origin
 search engine.  For new traits new properties can be added to the class
 :py:class:`EngineTraits`.
 To load traits from the persistence :py:obj:`EngineTraitsMap.from_data` can be
 used.
 """
 from __future__ import annotations
 import json
 import dataclasses
 import types
 from typing import Dict, Iterable, Union, Callable, Optional, TYPE_CHECKING
 from typing_extensions import Literal, Self
 from searx import locales
 from searx.data import data_dir, ENGINE_TRAITS
 if TYPE_CHECKING:
    from . import Engine
 class EngineTraitsEncoder(json.JSONEncoder):
    """Encodes :class:`EngineTraits` to a serializable object, see
    :class:`json.JSONEncoder`."""
    def default(self, o):
        """Return dictionary of a :class:`EngineTraits` object."""
        if isinstance(o, EngineTraits):
            return o.__dict__
        return super().default(o)
@dataclasses.dataclass
 class EngineTraits:
    """The class is intended to be instantiated for each engine."""
    regions: Dict[str, str] = dataclasses.field(default_factory=dict)
    """Maps SearXNG's internal representation of a region to the one of the engine.
    SearXNG's internal representation can be parsed by babel and the value is
    send to the engine:
    .. code:: python
       regions ={
           'fr-BE' : <engine's region name>,
       }
       for key, egnine_region regions.items():
          searxng_region = babel.Locale.parse(key, sep='-')
          ...
    """
    languages: Dict[str, str] = dataclasses.field(default_factory=dict)
    """Maps SearXNG's internal representation of a language to the one of the engine.
    SearXNG's internal representation can be parsed by babel and the value is
    send to the engine:
    .. code:: python
       languages = {
           'ca' : <engine's language name>,
       }
       for key, egnine_lang in languages.items():
          searxng_lang = babel.Locale.parse(key)
          ...
    """
    all_locale: Optional[str] = None
    """To which locale value SearXNG's ``all`` language is mapped (shown a "Default
    language").
    """
    data_type: Literal['traits_v1'] = 'traits_v1'
    """Data type, default is 'traits_v1'.
    """
    custom: Dict[str, Union[Dict[str, Dict], Iterable[str]]] = dataclasses.field(default_factory=dict)
    """A place to store engine's custom traits, not related to the SearXNG core.
    """
    def get_language(self, searxng_locale: str, default=None):
        """Return engine's language string that *best fits* to SearXNG's locale.
        :param searxng_locale: SearXNG's internal representation of locale
          selected by the user.
        :param default: engine's default language
        The *best fits* rules are implemented in
        :py:obj:`searx.locales.get_engine_locale`.  Except for the special value ``all``
        which is determined from :py:obj:`EngineTraits.all_locale`.
        """
        if searxng_locale == 'all' and self.all_locale is not None:
            return self.all_locale
        return locales.get_engine_locale(searxng_locale, self.languages, default=default)
    def get_region(self, searxng_locale: str, default=None):
        """Return engine's region string that best fits to SearXNG's locale.
        :param searxng_locale: SearXNG's internal representation of locale
          selected by the user.
        :param default: engine's default region
        The *best fits* rules are implemented in
        :py:obj:`searx.locales.get_engine_locale`.  Except for the special value ``all``
        which is determined from :py:obj:`EngineTraits.all_locale`.
        """
        if searxng_locale == 'all' and self.all_locale is not None:
            return self.all_locale
        return locales.get_engine_locale(searxng_locale, self.regions, default=default)
    def is_locale_supported(self, searxng_locale: str) -> bool:
        """A *locale* (SearXNG's internal representation) is considered to be
        supported by the engine if the *region* or the *language* is supported
        by the engine.
        For verification the functions :py:func:`EngineTraits.get_region` and
        :py:func:`EngineTraits.get_language` are used.
        """
        if self.data_type == 'traits_v1':
            return bool(self.get_region(searxng_locale) or self.get_language(searxng_locale))
        raise TypeError('engine traits of type %s is unknown' % self.data_type)
    def copy(self):
        """Create a copy of the dataclass object."""
        return EngineTraits(**dataclasses.asdict(self))
    @classmethod
    def fetch_traits(cls, engine: Engine) -> Union[Self, None]:
        """Call a function ``fetch_traits(engine_traits)`` from engines namespace to fetch
        and set properties from the origin engine in the object ``engine_traits``.  If
        function does not exists, ``None`` is returned.
        """
        fetch_traits = getattr(engine, 'fetch_traits', None)
        engine_traits = None
        if fetch_traits:
            engine_traits = cls()
            fetch_traits(engine_traits)
        return engine_traits
    def set_traits(self, engine: Engine):
        """Set traits from self object in a :py:obj:`.Engine` namespace.
        :param engine: engine instance build by :py:func:`searx.engines.load_engine`
        """
        if self.data_type == 'traits_v1':
            self._set_traits_v1(engine)
        else:
            raise TypeError('engine traits of type %s is unknown' % self.data_type)
    def _set_traits_v1(self, engine: Engine):
        # For an engine, when there is `language: ...` in the YAML settings the engine
        # does support only this one language (region)::
        #
        #   - name: google italian
        #     engine: google
        #     language: it
        #     region: it-IT
        traits = self.copy()
        _msg = "settings.yml - engine: '%s' / %s: '%s' not supported"
        languages = traits.languages
        if hasattr(engine, 'language'):
            if engine.language not in languages:
                raise ValueError(_msg % (engine.name, 'language', engine.language))
            traits.languages = {engine.language: languages[engine.language]}
        regions = traits.regions
        if hasattr(engine, 'region'):
            if engine.region not in regions:
                raise ValueError(_msg % (engine.name, 'region', engine.region))
            traits.regions = {engine.region: regions[engine.region]}
        engine.language_support = bool(traits.languages or traits.regions)
        # set the copied & modified traits in engine's namespace
        engine.traits = traits
 class EngineTraitsMap(Dict[str, EngineTraits]):
    """A python dictionary to map :class:`EngineTraits` by engine name."""
    ENGINE_TRAITS_FILE = (data_dir / 'engine_traits.json').resolve()
    """File with persistence of the :py:obj:`EngineTraitsMap`."""
    def save_data(self):
        """Store EngineTraitsMap in in file :py:obj:`self.ENGINE_TRAITS_FILE`"""
        with open(self.ENGINE_TRAITS_FILE, 'w', encoding='utf-8') as f:
            json.dump(self, f, indent=2, sort_keys=True, cls=EngineTraitsEncoder)
    @classmethod
    def from_data(cls) -> Self:
        """Instantiate :class:`EngineTraitsMap` object from :py:obj:`ENGINE_TRAITS`"""
        obj = cls()
        for k, v in ENGINE_TRAITS.items():
            obj[k] = EngineTraits(**v)
        return obj
    @classmethod
    def fetch_traits(cls, log: Callable) -> Self:
        from searx import engines  # pylint: disable=cyclic-import, import-outside-toplevel
        names = list(engines.engines)
        names.sort()
        obj = cls()
        for engine_name in names:
            engine = engines.engines[engine_name]
            traits = EngineTraits.fetch_traits(engine)
            if traits is not None:
                log("%-20s: SearXNG languages --> %s " % (engine_name, len(traits.languages)))
                log("%-20s: SearXNG regions   --> %s" % (engine_name, len(traits.regions)))
                obj[engine_name] = traits
        return obj
    def set_traits(self, engine: Engine | types.ModuleType):
        """Set traits in a :py:obj:`Engine` namespace.
        :param engine: engine instance build by :py:func:`searx.engines.load_engine`
        """
        engine_traits = EngineTraits(data_type='traits_v1')
        if engine.name in self.keys():
            engine_traits = self[engine.name]
        elif engine.engine in self.keys():
            # The key of the dictionary traits_map is the *engine name*
            # configured in settings.xml.  When multiple engines are configured
            # in settings.yml to use the same origin engine (python module)
            # these additional engines can use the languages from the origin
            # engine.  For this use the configured ``engine: ...`` from
            # settings.yml
            engine_traits = self[engine.engine]
        engine_traits.set_traits(engine)
@@ -0,0 +1,57 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 1337x
 """
 from urllib.parse import quote, urljoin
 from lxml import html
 from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
 # about
 about = {
    "website": 'https://1337x.to/',
    "wikidata_id": 'Q28134166',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 url = 'https://1337x.to/'
 search_url = url + 'search/{search_term}/{pageno}/'
 categories = ['files']
 paging = True
 def request(query, params):
    params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'])
    return params
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    for result in eval_xpath_list(dom, '//table[contains(@class, "table-list")]/tbody//tr'):
        href = urljoin(url, eval_xpath_getindex(result, './td[contains(@class, "name")]/a[2]/@href', 0))
        title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]'))
        seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]'))
        leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]'))
        filesize_info = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()'))
        filesize, filesize_multiplier = filesize_info.split()
        filesize = get_torrent_size(filesize, filesize_multiplier)
        results.append(
            {
                'url': href,
                'title': title,
                'seed': seed,
                'leech': leech,
                'filesize': filesize,
                'template': 'torrent.html',
            }
        )
    return results
@@ -0,0 +1,77 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pylint: disable=invalid-name
 """9GAG (social media)"""
 from json import loads
 from datetime import datetime
 from urllib.parse import urlencode
 about = {
    "website": 'https://9gag.com/',
    "wikidata_id": 'Q277421',
    "official_api_documentation": None,
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 categories = ['social media']
 paging = True
 search_url = "https://9gag.com/v1/search-posts?{query}"
 page_size = 10
 def request(query, params):
    query = urlencode({'query': query, 'c': (params['pageno'] - 1) * page_size})
    params['url'] = search_url.format(query=query)
    return params
 def response(resp):
    results = []
    json_results = loads(resp.text)['data']
    for result in json_results['posts']:
        result_type = result['type']
        # Get the not cropped version of the thumbnail when the image height is not too important
        if result['images']['image700']['height'] > 400:
            thumbnail = result['images']['imageFbThumbnail']['url']
        else:
            thumbnail = result['images']['image700']['url']
        if result_type == 'Photo':
            results.append(
                {
                    'template': 'images.html',
                    'url': result['url'],
                    'title': result['title'],
                    'content': result['description'],
                    'publishedDate': datetime.utcfromtimestamp(result['creationTs']),
                    'img_src': result['images']['image700']['url'],
                    'thumbnail_src': thumbnail,
                }
            )
        elif result_type == 'Animated':
            results.append(
                {
                    'template': 'videos.html',
                    'url': result['url'],
                    'title': result['title'],
                    'content': result['description'],
                    'publishedDate': datetime.utcfromtimestamp(result['creationTs']),
                    'thumbnail': thumbnail,
                    'iframe_src': result['images'].get('image460sv', {}).get('url'),
                }
            )
    if 'tags' in json_results:
        for suggestion in json_results['tags']:
            results.append({'suggestion': suggestion['key']})
    return results
@@ -0,0 +1,253 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Load and initialize the ``engines``, see :py:func:`load_engines` and register
 :py:obj:`engine_shortcuts`.
 usage::
    load_engines( settings['engines'] )
 """
 from __future__ import annotations
 import sys
 import copy
 from os.path import realpath, dirname
 from typing import TYPE_CHECKING, Dict
 import types
 import inspect
 from searx import logger, settings
 from searx.utils import load_module
 if TYPE_CHECKING:
    from searx.enginelib import Engine
 logger = logger.getChild('engines')
 ENGINE_DIR = dirname(realpath(__file__))
 ENGINE_DEFAULT_ARGS = {
    # Common options in the engine module
    "engine_type": "online",
    "paging": False,
    "time_range_support": False,
    "safesearch": False,
    # settings.yml
    "categories": ["general"],
    "enable_http": False,
    "shortcut": "-",
    "timeout": settings["outgoing"]["request_timeout"],
    "display_error_messages": True,
    "disabled": False,
    "inactive": False,
    "about": {},
    "using_tor_proxy": False,
    "send_accept_language_header": False,
    "tokens": [],
 }
 # set automatically when an engine does not have any tab category
 DEFAULT_CATEGORY = 'other'
 # Defaults for the namespace of an engine module, see :py:func:`load_engine`
 categories = {'general': []}
 engines: Dict[str, Engine | types.ModuleType] = {}
 engine_shortcuts = {}
 """Simple map of registered *shortcuts* to name of the engine (or ``None``).
 ::
    engine_shortcuts[engine.shortcut] = engine.name
 :meta hide-value:
 """
 def check_engine_module(module: types.ModuleType):
    # probe unintentional name collisions / for example name collisions caused
    # by import statements in the engine module ..
    # network: https://github.com/searxng/searxng/issues/762#issuecomment-1605323861
    obj = getattr(module, 'network', None)
    if obj and inspect.ismodule(obj):
        msg = f'type of {module.__name__}.network is a module ({obj.__name__}), expected a string'
        # logger.error(msg)
        raise TypeError(msg)
 def load_engine(engine_data: dict) -> Engine | types.ModuleType | None:
    """Load engine from ``engine_data``.
    :param dict engine_data:  Attributes from YAML ``settings:engines/<engine>``
    :return: initialized namespace of the ``<engine>``.
    1. create a namespace and load module of the ``<engine>``
    2. update namespace with the defaults from :py:obj:`ENGINE_DEFAULT_ARGS`
    3. update namespace with values from ``engine_data``
    If engine *is active*, return namespace of the engine, otherwise return
    ``None``.
    This function also returns ``None`` if initialization of the namespace fails
    for one of the following reasons:
    - engine name contains underscore
    - engine name is not lowercase
    - required attribute is not set :py:func:`is_missing_required_attributes`
    """
    # pylint: disable=too-many-return-statements
    engine_name = engine_data.get('name')
    if engine_name is None:
        logger.error('An engine does not have a "name" field')
        return None
    if '_' in engine_name:
        logger.error('Engine name contains underscore: "{}"'.format(engine_name))
        return None
    if engine_name.lower() != engine_name:
        logger.warning('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
        engine_name = engine_name.lower()
        engine_data['name'] = engine_name
    # load_module
    module_name = engine_data.get('engine')
    if module_name is None:
        logger.error('The "engine" field is missing for the engine named "{}"'.format(engine_name))
        return None
    try:
        engine = load_module(module_name + '.py', ENGINE_DIR)
    except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError):
        logger.exception('Fatal exception in engine "{}"'.format(module_name))
        sys.exit(1)
    except BaseException:
        logger.exception('Cannot load engine "{}"'.format(module_name))
        return None
    check_engine_module(engine)
    update_engine_attributes(engine, engine_data)
    update_attributes_for_tor(engine)
    # avoid cyclic imports
    # pylint: disable=import-outside-toplevel
    from searx.enginelib.traits import EngineTraitsMap
    trait_map = EngineTraitsMap.from_data()
    trait_map.set_traits(engine)
    if not is_engine_active(engine):
        return None
    if is_missing_required_attributes(engine):
        return None
    set_loggers(engine, engine_name)
    if not any(cat in settings['categories_as_tabs'] for cat in engine.categories):
        engine.categories.append(DEFAULT_CATEGORY)
    return engine
 def set_loggers(engine, engine_name):
    # set the logger for engine
    engine.logger = logger.getChild(engine_name)
    # the engine may have load some other engines
    # may sure the logger is initialized
    # use sys.modules.copy() to avoid "RuntimeError: dictionary changed size during iteration"
    # see https://github.com/python/cpython/issues/89516
    # and https://docs.python.org/3.10/library/sys.html#sys.modules
    modules = sys.modules.copy()
    for module_name, module in modules.items():
        if (
            module_name.startswith("searx.engines")
            and module_name != "searx.engines.__init__"
            and not hasattr(module, "logger")
        ):
            module_engine_name = module_name.split(".")[-1]
            module.logger = logger.getChild(module_engine_name)  # type: ignore
 def update_engine_attributes(engine: Engine | types.ModuleType, engine_data):
    # set engine attributes from engine_data
    for param_name, param_value in engine_data.items():
        if param_name == 'categories':
            if isinstance(param_value, str):
                param_value = list(map(str.strip, param_value.split(',')))
            engine.categories = param_value  # type: ignore
        elif hasattr(engine, 'about') and param_name == 'about':
            engine.about = {**engine.about, **engine_data['about']}  # type: ignore
        else:
            setattr(engine, param_name, param_value)
    # set default attributes
    for arg_name, arg_value in ENGINE_DEFAULT_ARGS.items():
        if not hasattr(engine, arg_name):
            setattr(engine, arg_name, copy.deepcopy(arg_value))
 def update_attributes_for_tor(engine: Engine | types.ModuleType):
    if using_tor_proxy(engine) and hasattr(engine, 'onion_url'):
        engine.search_url = engine.onion_url + getattr(engine, 'search_path', '')  # type: ignore
        engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)  # type: ignore
 def is_missing_required_attributes(engine):
    """An attribute is required when its name doesn't start with ``_`` (underline).
    Required attributes must not be ``None``.
    """
    missing = False
    for engine_attr in dir(engine):
        if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None:
            logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr))
            missing = True
    return missing
 def using_tor_proxy(engine: Engine | types.ModuleType):
    """Return True if the engine configuration declares to use Tor."""
    return settings['outgoing'].get('using_tor_proxy') or getattr(engine, 'using_tor_proxy', False)
 def is_engine_active(engine: Engine | types.ModuleType):
    # check if engine is inactive
    if engine.inactive is True:
        return False
    # exclude onion engines if not using tor
    if 'onions' in engine.categories and not using_tor_proxy(engine):
        return False
    return True
 def register_engine(engine: Engine | types.ModuleType):
    if engine.name in engines:
        logger.error('Engine config error: ambiguous name: {0}'.format(engine.name))
        sys.exit(1)
    engines[engine.name] = engine
    if engine.shortcut in engine_shortcuts:
        logger.error('Engine config error: ambiguous shortcut: {0}'.format(engine.shortcut))
        sys.exit(1)
    engine_shortcuts[engine.shortcut] = engine.name
    for category_name in engine.categories:
        categories.setdefault(category_name, []).append(engine)
 def load_engines(engine_list):
    """usage: ``engine_list = settings['engines']``"""
    engines.clear()
    engine_shortcuts.clear()
    categories.clear()
    categories['general'] = []
    for engine_data in engine_list:
        engine = load_engine(engine_data)
        if engine:
            register_engine(engine)
    return engines
@@ -0,0 +1,80 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Ahmia (Onions)
 """
 from urllib.parse import urlencode, urlparse, parse_qs
 from lxml.html import fromstring
 from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
 # about
 about = {
    "website": 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion',
    "wikidata_id": 'Q18693938',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine config
 categories = ['onions']
 paging = True
 page_size = 10
 # search url
 search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}'
 time_range_support = True
 time_range_dict = {'day': 1, 'week': 7, 'month': 30}
 # xpaths
 results_xpath = '//li[@class="result"]'
 url_xpath = './h4/a/@href'
 title_xpath = './h4/a[1]'
 content_xpath = './/p[1]'
 correction_xpath = '//*[@id="didYouMean"]//a'
 number_of_results_xpath = '//*[@id="totalResults"]'
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'q': query}))
    if params['time_range'] in time_range_dict:
        params['url'] += '&' + urlencode({'d': time_range_dict[params['time_range']]})
    return params
 def response(resp):
    results = []
    dom = fromstring(resp.text)
    # trim results so there's not way too many at once
    first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1)
    all_results = eval_xpath_list(dom, results_xpath)
    trimmed_results = all_results[first_result_index : first_result_index + page_size]
    # get results
    for result in trimmed_results:
        # remove ahmia url and extract the actual url for the result
        raw_url = extract_url(eval_xpath_list(result, url_xpath, min_len=1), search_url)
        cleaned_url = parse_qs(urlparse(raw_url).query).get('redirect_url', [''])[0]
        title = extract_text(eval_xpath(result, title_xpath))
        content = extract_text(eval_xpath(result, content_xpath))
        results.append({'url': cleaned_url, 'title': title, 'content': content, 'is_onion': True})
    # get spelling corrections
    for correction in eval_xpath_list(dom, correction_xpath):
        results.append({'correction': extract_text(correction)})
    # get number of results
    number_of_results = eval_xpath(dom, number_of_results_xpath)
    if number_of_results:
        try:
            results.append({'number_of_results': int(extract_text(number_of_results))})
        except:
            pass
    return results
@@ -0,0 +1,187 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """`Anna's Archive`_ is a free non-profit online shadow library metasearch
 engine providing access to a variety of book resources (also via IPFS), created
 by a team of anonymous archivists (AnnaArchivist_).
 .. _Anna's Archive: https://annas-archive.org/
 .. _AnnaArchivist: https://annas-software.org/AnnaArchivist/annas-archive
 Configuration
 =============
 The engine has the following additional settings:
 - :py:obj:`aa_content`
 - :py:obj:`aa_ext`
 - :py:obj:`aa_sort`
 With this options a SearXNG maintainer is able to configure **additional**
 engines for specific searches in Anna's Archive.  For example a engine to search
 for *newest* articles and journals (PDF) / by shortcut ``!aaa <search-term>``.
 .. code:: yaml
  - name: annas articles
    engine: annas_archive
    shortcut: aaa
    aa_content: 'journal_article'
    aa_ext: 'pdf'
    aa_sort: 'newest'
 Implementations
 ===============
 """
 from typing import List, Dict, Any, Optional
 from urllib.parse import quote
 from lxml import html
 from searx.utils import extract_text, eval_xpath, eval_xpath_list
 from searx.enginelib.traits import EngineTraits
 from searx.data import ENGINE_TRAITS
 # about
 about: Dict[str, Any] = {
    "website": "https://annas-archive.org/",
    "wikidata_id": "Q115288326",
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": "HTML",
 }
 # engine dependent config
 categories: List[str] = ["files"]
 paging: bool = False
 # search-url
 base_url: str = "https://annas-archive.org"
 aa_content: str = ""
 """Anan's search form field **Content** / possible values::
    journal_article, book_any, book_fiction, book_unknown, book_nonfiction,
    book_comic, magazine, standards_document
 To not filter use an empty string (default).
 """
 aa_sort: str = ''
 """Sort Anna's results, possible values::
    newest, oldest, largest, smallest
 To sort by *most relevant* use an empty string (default)."""
 aa_ext: str = ''
 """Filter Anna's results by a file ending.  Common filters for example are
 ``pdf`` and ``epub``.
 .. note::
   Anna's Archive is a beta release: Filter results by file extension does not
   really work on Anna's Archive.
 """
 def init(engine_settings=None):  # pylint: disable=unused-argument
    """Check of engine's settings."""
    traits = EngineTraits(**ENGINE_TRAITS['annas archive'])
    if aa_content and aa_content not in traits.custom['content']:
        raise ValueError(f'invalid setting content: {aa_content}')
    if aa_sort and aa_sort not in traits.custom['sort']:
        raise ValueError(f'invalid setting sort: {aa_sort}')
    if aa_ext and aa_ext not in traits.custom['ext']:
        raise ValueError(f'invalid setting ext: {aa_ext}')
 def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
    q = quote(query)
    lang = traits.get_language(params["language"], traits.all_locale)  # type: ignore
    params["url"] = base_url + f"/search?lang={lang or ''}&content={aa_content}&ext={aa_ext}&sort={aa_sort}&q={q}"
    return params
 def response(resp) -> List[Dict[str, Optional[str]]]:
    results: List[Dict[str, Optional[str]]] = []
    dom = html.fromstring(resp.text)
    for item in eval_xpath_list(dom, '//main//div[contains(@class, "h-[125]")]/a'):
        results.append(_get_result(item))
    # The rendering of the WEB page is very strange; except the first position
    # all other positions of Anna's result page are enclosed in SGML comments.
    # These comments are *uncommented* by some JS code, see query of class
    # '.js-scroll-hidden' in Anna's HTML template:
    #   https://annas-software.org/AnnaArchivist/annas-archive/-/blob/main/allthethings/templates/macros/md5_list.html
    for item in eval_xpath_list(dom, '//main//div[contains(@class, "js-scroll-hidden")]'):
        item = html.fromstring(item.xpath('./comment()')[0].text)
        results.append(_get_result(item))
    return results
 def _get_result(item):
    return {
        'template': 'paper.html',
        'url': base_url + item.xpath('./@href')[0],
        'title': extract_text(eval_xpath(item, './/h3/text()[1]')),
        'publisher': extract_text(eval_xpath(item, './/div[contains(@class, "text-sm")]')),
        'authors': [extract_text(eval_xpath(item, './/div[contains(@class, "italic")]'))],
        'content': extract_text(eval_xpath(item, './/div[contains(@class, "text-xs")]')),
        'img_src': item.xpath('.//img/@src')[0],
    }
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and other search arguments from Anna's search form."""
    # pylint: disable=import-outside-toplevel
    import babel
    from searx.network import get  # see https://github.com/searxng/searxng/issues/762
    from searx.locales import language_tag
    engine_traits.all_locale = ''
    engine_traits.custom['content'] = []
    engine_traits.custom['ext'] = []
    engine_traits.custom['sort'] = []
    resp = get(base_url + '/search')
    if not resp.ok:  # type: ignore
        raise RuntimeError("Response from Anna's search page is not OK.")
    dom = html.fromstring(resp.text)  # type: ignore
    # supported language codes
    lang_map = {}
    for x in eval_xpath_list(dom, "//form//select[@name='lang']//option"):
        eng_lang = x.get("value")
        if eng_lang in ('', '_empty', 'nl-BE', 'und'):
            continue
        try:
            locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
        except babel.UnknownLocaleError:
            # silently ignore unknown languages
            # print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
            continue
        sxng_lang = language_tag(locale)
        conflict = engine_traits.languages.get(sxng_lang)
        if conflict:
            if conflict != eng_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
            continue
        engine_traits.languages[sxng_lang] = eng_lang
    for x in eval_xpath_list(dom, "//form//select[@name='content']//option"):
        engine_traits.custom['content'].append(x.get("value"))
    for x in eval_xpath_list(dom, "//form//select[@name='ext']//option"):
        engine_traits.custom['ext'].append(x.get("value"))
    for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"):
        engine_traits.custom['sort'].append(x.get("value"))
@@ -0,0 +1,62 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """APKMirror
 """
 # pylint: disable=invalid-name
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import (
    eval_xpath_list,
    eval_xpath_getindex,
    extract_text,
 )
 about = {
    "website": 'https://www.apkmirror.com',
    "wikidata_id": None,
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['files', 'apps']
 paging = True
 time_range_support = False
 # search-url
 base_url = 'https://www.apkmirror.com'
 search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}'
 def request(query, params):
    params['url'] = search_url.format(
        pageno=params['pageno'],
        query=urlencode({'s': query}),
    )
    logger.debug("query_url --> %s", params['url'])
    return params
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in eval_xpath_list(dom, "//div[@id='content']//div[@class='listWidget']/div/div[@class='appRow']"):
        link = eval_xpath_getindex(result, './/h5/a', 0)
        url = base_url + link.attrib.get('href') + '#downloads'
        title = extract_text(link)
        img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
        res = {'url': url, 'title': title, 'img_src': img_src}
        results.append(res)
    return results
@@ -0,0 +1,57 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
    Apple App Store
 """
 from json import loads
 from urllib.parse import urlencode
 from dateutil.parser import parse
 about = {
    "website": 'https://www.apple.com/app-store/',
    "wikidata_id": 'Q368215',
    "official_api_documentation": (
        'https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/'
        'iTuneSearchAPI/UnderstandingSearchResults.html#//apple_ref/doc/uid/TP40017632-CH8-SW1'
    ),
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 categories = ['files', 'apps']
 safesearch = True
 search_url = 'https://itunes.apple.com/search?{query}'
 def request(query, params):
    explicit = "Yes"
    if params['safesearch'] > 0:
        explicit = "No"
    params['url'] = search_url.format(query=urlencode({'term': query, 'media': 'software', 'explicit': explicit}))
    return params
 def response(resp):
    results = []
    json_result = loads(resp.text)
    for result in json_result['results']:
        results.append(
            {
                'url': result['trackViewUrl'],
                'title': result['trackName'],
                'content': result['description'],
                'img_src': result['artworkUrl100'],
                'publishedDate': parse(result['currentVersionReleaseDate']),
                'author': result['sellerName'],
            }
        )
    return results
@@ -0,0 +1,113 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Apple Maps"""
 from json import loads
 from time import time
 from urllib.parse import urlencode
 from searx.network import get as http_get
 from searx.engines.openstreetmap import get_key_label
 about = {
    "website": 'https://www.apple.com/maps/',
    "wikidata_id": 'Q276101',
    "official_api_documentation": None,
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 token = {'value': '', 'last_updated': None}
 categories = ['map']
 paging = False
 search_url = "https://api.apple-mapkit.com/v1/search?{query}&mkjsVersion=5.72.53"
 def obtain_token():
    update_time = time() - (time() % 1800)
    try:
        # use duckduckgo's mapkit token
        token_response = http_get('https://duckduckgo.com/local.js?get_mk_token=1', timeout=2.0)
        actual_token = http_get(
            'https://cdn.apple-mapkit.com/ma/bootstrap?apiVersion=2&mkjsVersion=5.72.53&poi=1',
            timeout=2.0,
            headers={'Authorization': 'Bearer ' + token_response.text},
        )
        token['value'] = loads(actual_token.text)['authInfo']['access_token']
        token['last_updated'] = update_time
    # pylint: disable=bare-except
    except:
        pass
    return token
 def request(query, params):
    if time() - (token['last_updated'] or 0) > 1800:
        obtain_token()
    params['url'] = search_url.format(query=urlencode({'q': query, 'lang': params['language']}))
    params['headers'] = {'Authorization': 'Bearer ' + token['value']}
    return params
 def response(resp):
    results = []
    resp_json = loads(resp.text)
    user_language = resp.search_params['language']
    for result in resp_json['results']:
        boundingbox = None
        if 'displayMapRegion' in result:
            box = result['displayMapRegion']
            boundingbox = [box['southLat'], box['northLat'], box['westLng'], box['eastLng']]
        links = []
        if 'telephone' in result:
            telephone = result['telephone']
            links.append(
                {
                    'label': get_key_label('phone', user_language),
                    'url': 'tel:' + telephone,
                    'url_label': telephone,
                }
            )
        if result.get('urls'):
            url = result['urls'][0]
            links.append(
                {
                    'label': get_key_label('website', user_language),
                    'url': url,
                    'url_label': url,
                }
            )
        results.append(
            {
                'template': 'map.html',
                'type': result.get('poiCategory'),
                'title': result['name'],
                'links': links,
                'latitude': result['center']['lat'],
                'longitude': result['center']['lng'],
                'url': result['placecardUrl'],
                'boundingbox': boundingbox,
                'geojson': {'type': 'Point', 'coordinates': [result['center']['lng'], result['center']['lat']]},
                'address': {
                    'name': result['name'],
                    'house_number': result.get('subThoroughfare'),
                    'road': result.get('thoroughfare'),
                    'locality': result.get('locality'),
                    'postcode': result.get('postCode'),
                    'country': result.get('country'),
                },
            }
        )
    return results
@@ -0,0 +1,152 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Arch Linux Wiki
 ~~~~~~~~~~~~~~~
 This implementation does not use a official API: Mediawiki provides API, but
 Arch Wiki blocks access to it.
 """
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode, urljoin, urlparse
 import lxml
 import babel
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 from searx.enginelib.traits import EngineTraits
 from searx.locales import language_tag
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://wiki.archlinux.org/',
    "wikidata_id": 'Q101445877',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['it', 'software wikis']
 paging = True
 main_wiki = 'wiki.archlinux.org'
 def request(query, params):
    sxng_lang = params['searxng_locale'].split('-')[0]
    netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)  # type: ignore
    title: str = traits.custom['title'].get(sxng_lang, 'Special:Search')  # type: ignore
    base_url = 'https://' + netloc + '/index.php?'
    offset = (params['pageno'] - 1) * 20
    if netloc == main_wiki:
        eng_lang: str = traits.get_language(sxng_lang, 'English')  # type: ignore
        query += ' (' + eng_lang + ')'
    elif netloc == 'wiki.archlinuxcn.org':
        base_url = 'https://' + netloc + '/wzh/index.php?'
    args = {
        'search': query,
        'title': title,
        'limit': 20,
        'offset': offset,
        'profile': 'default',
    }
    params['url'] = base_url + urlencode(args)
    return params
 def response(resp):
    results = []
    dom = lxml.html.fromstring(resp.text)  # type: ignore
    # get the base URL for the language in which request was made
    sxng_lang = resp.search_params['searxng_locale'].split('-')[0]
    netloc: str = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki)  # type: ignore
    base_url = 'https://' + netloc + '/index.php?'
    for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'):
        link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0)
        content = extract_text(result.xpath('.//div[@class="searchresult"]'))
        results.append(
            {
                'url': urljoin(base_url, link.get('href')),  # type: ignore
                'title': extract_text(link),
                'content': content,
            }
        )
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages from Archlinix-Wiki.  The location of the Wiki address of a
    language is mapped in a :py:obj:`custom field
    <searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``).  Depending
    on the location, the ``title`` argument in the request is translated.
    .. code:: python
       "custom": {
         "wiki_netloc": {
           "de": "wiki.archlinux.de",
            # ...
           "zh": "wiki.archlinuxcn.org"
         }
         "title": {
           "de": "Spezial:Suche",
            # ...
           "zh": "Special:\u641c\u7d22"
         },
       },
    """
    # pylint: disable=import-outside-toplevel
    from searx.network import get  # see https://github.com/searxng/searxng/issues/762
    engine_traits.custom['wiki_netloc'] = {}
    engine_traits.custom['title'] = {}
    title_map = {
        'de': 'Spezial:Suche',
        'fa': 'ویژه:جستجو',
        'ja': '特別:検索',
        'zh': 'Special:搜索',
    }
    resp = get('https://wiki.archlinux.org/')
    if not resp.ok:  # type: ignore
        print("ERROR: response from wiki.archlinix.org is not OK.")
    dom = lxml.html.fromstring(resp.text)  # type: ignore
    for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"):
        sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-'))
        # zh_Hans --> zh
        sxng_tag = sxng_tag.split('_')[0]
        netloc = urlparse(a.get('href')).netloc
        if netloc != 'wiki.archlinux.org':
            title = title_map.get(sxng_tag)
            if not title:
                print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag))
                continue
            engine_traits.custom['wiki_netloc'][sxng_tag] = netloc
            engine_traits.custom['title'][sxng_tag] = title  # type: ignore
        eng_tag = extract_text(eval_xpath_list(a, ".//span"))
        engine_traits.languages[sxng_tag] = eng_tag  # type: ignore
    engine_traits.languages['en'] = 'English'
@@ -0,0 +1,69 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """The Art Institute of Chicago
 Explore thousands of artworks from The Art Institute of Chicago.
 * https://artic.edu
 """
 from json import loads
 from urllib.parse import urlencode
 about = {
    "website": 'https://www.artic.edu',
    "wikidata_id": 'Q239303',
    "official_api_documentation": 'http://api.artic.edu/docs/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 categories = ['images']
 paging = True
 nb_per_page = 20
 search_api = 'https://api.artic.edu/api/v1/artworks/search?'
 image_api = 'https://www.artic.edu/iiif/2/'
 def request(query, params):
    args = urlencode(
        {
            'q': query,
            'page': params['pageno'],
            'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
            'limit': nb_per_page,
        }
    )
    params['url'] = search_api + args
    logger.debug("query_url --> %s", params['url'])
    return params
 def response(resp):
    results = []
    json_data = loads(resp.text)
    for result in json_data['data']:
        if not result['image_id']:
            continue
        results.append(
            {
                'url': 'https://artic.edu/artworks/%(id)s' % result,
                'title': result['title'] + " (%(date_display)s) //  %(artist_display)s" % result,
                'content': result['medium_display'],
                'author': ', '.join(result['artist_titles']),
                'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
                'img_format': result['dimensions'],
                'template': 'images.html',
            }
        )
    return results
@@ -0,0 +1,109 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 ArXiV (Scientific preprints)
 """
 from lxml import etree
 from lxml.etree import XPath
 from datetime import datetime
 from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
 # about
 about = {
    "website": 'https://arxiv.org',
    "wikidata_id": 'Q118398',
    "official_api_documentation": 'https://arxiv.org/help/api',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'XML-RSS',
 }
 categories = ['science', 'scientific publications']
 paging = True
 base_url = (
    'https://export.arxiv.org/api/query?search_query=all:' + '{query}&start={offset}&max_results={number_of_results}'
 )
 # engine dependent config
 number_of_results = 10
 # xpaths
 arxiv_namespaces = {
    "atom": "http://www.w3.org/2005/Atom",
    "arxiv": "http://arxiv.org/schemas/atom",
 }
 xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
 xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
 xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
 xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
 xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
 xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
 xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
 xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
 xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
 xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
 xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
 def request(query, params):
    # basic search
    offset = (params['pageno'] - 1) * number_of_results
    string_args = dict(query=query, offset=offset, number_of_results=number_of_results)
    params['url'] = base_url.format(**string_args)
    return params
 def response(resp):
    results = []
    dom = etree.fromstring(resp.content)
    for entry in eval_xpath_list(dom, xpath_entry):
        title = eval_xpath_getindex(entry, xpath_title, 0).text
        url = eval_xpath_getindex(entry, xpath_id, 0).text
        abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
        authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
        #  doi
        doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
        doi = None if doi_element is None else doi_element.text
        # pdf
        pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
        pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
        # journal
        journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
        journal = None if journal_element is None else journal_element.text
        # tags
        tag_elements = eval_xpath(entry, xpath_category)
        tags = [str(tag) for tag in tag_elements]
        # comments
        comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
        comments = None if comments_elements is None else comments_elements.text
        publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
        res_dict = {
            'template': 'paper.html',
            'url': url,
            'title': title,
            'publishedDate': publishedDate,
            'content': abstract,
            'doi': doi,
            'authors': authors,
            'journal': journal,
            'tags': tags,
            'comments': comments,
            'pdf_url': pdf_url,
        }
        results.append(res_dict)
    return results
@@ -0,0 +1,95 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Bandcamp (Music)
@website     https://bandcamp.com/
@provide-api no
@results     HTML
@parse       url, title, content, publishedDate, iframe_src, thumbnail
 """
 from urllib.parse import urlencode, urlparse, parse_qs
 from dateutil.parser import parse as dateparse
 from lxml import html
 from searx.utils import (
    eval_xpath_getindex,
    eval_xpath_list,
    extract_text,
 )
 # about
 about = {
    "website": 'https://bandcamp.com/',
    "wikidata_id": 'Q545966',
    "official_api_documentation": 'https://bandcamp.com/developer',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 categories = ['music']
 paging = True
 base_url = "https://bandcamp.com/"
 search_string = 'search?{query}&page={page}'
 iframe_src = "https://bandcamp.com/EmbeddedPlayer/{type}={result_id}/size=large/bgcol=000/linkcol=fff/artwork=small"
 def request(query, params):
    '''pre-request callback
    params<dict>:
      method  : POST/GET
      headers : {}
      data    : {} # if method == POST
      url     : ''
      category: 'search category'
      pageno  : 1 # number of the requested page
    '''
    search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno'])
    params['url'] = base_url + search_path
    return params
 def response(resp):
    '''post-response callback
    resp: requests response object
    '''
    results = []
    dom = html.fromstring(resp.text)
    for result in eval_xpath_list(dom, '//li[contains(@class, "searchresult")]'):
        link = eval_xpath_getindex(result, './/div[@class="itemurl"]/a', 0, default=None)
        if link is None:
            continue
        title = result.xpath('.//div[@class="heading"]/a/text()')
        content = result.xpath('.//div[@class="subhead"]/text()')
        new_result = {
            "url": extract_text(link),
            "title": extract_text(title),
            "content": extract_text(content),
        }
        date = eval_xpath_getindex(result, '//div[@class="released"]/text()', 0, default=None)
        if date:
            new_result["publishedDate"] = dateparse(date.replace("released ", ""))
        thumbnail = result.xpath('.//div[@class="art"]/img/@src')
        if thumbnail:
            new_result['img_src'] = thumbnail[0]
        result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
        itemtype = extract_text(result.xpath('.//div[@class="itemtype"]')).lower()
        if "album" == itemtype:
            new_result["iframe_src"] = iframe_src.format(type='album', result_id=result_id)
        elif "track" == itemtype:
            new_result["iframe_src"] = iframe_src.format(type='track', result_id=result_id)
        results.append(new_result)
    return results
@@ -0,0 +1,112 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 BASE (Scholar publications)
 """
 from urllib.parse import urlencode
 from lxml import etree
 from datetime import datetime
 import re
 from searx.utils import searx_useragent
 # about
 about = {
    "website": 'https://base-search.net',
    "wikidata_id": 'Q448335',
    "official_api_documentation": 'https://api.base-search.net/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'XML',
 }
 categories = ['science']
 base_url = (
    'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'
    + '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}'
 )
 # engine dependent config
 paging = True
 number_of_results = 10
 # shortcuts for advanced search
 shorcut_dict = {
    # user-friendly keywords
    'format:': 'dcformat:',
    'author:': 'dccreator:',
    'collection:': 'dccollection:',
    'hdate:': 'dchdate:',
    'contributor:': 'dccontributor:',
    'coverage:': 'dccoverage:',
    'date:': 'dcdate:',
    'abstract:': 'dcdescription:',
    'urls:': 'dcidentifier:',
    'language:': 'dclanguage:',
    'publisher:': 'dcpublisher:',
    'relation:': 'dcrelation:',
    'rights:': 'dcrights:',
    'source:': 'dcsource:',
    'subject:': 'dcsubject:',
    'title:': 'dctitle:',
    'type:': 'dcdctype:',
 }
 def request(query, params):
    # replace shortcuts with API advanced search keywords
    for key in shorcut_dict.keys():
        query = re.sub(key, shorcut_dict[key], query)
    # basic search
    offset = (params['pageno'] - 1) * number_of_results
    string_args = dict(query=urlencode({'query': query}), offset=offset, hits=number_of_results)
    params['url'] = base_url.format(**string_args)
    params['headers']['User-Agent'] = searx_useragent()
    return params
 def response(resp):
    results = []
    search_results = etree.XML(resp.content)
    for entry in search_results.xpath('./result/doc'):
        content = "No description available"
        date = datetime.now()  # needed in case no dcdate is available for an item
        for item in entry:
            if item.attrib["name"] == "dcdate":
                date = item.text
            elif item.attrib["name"] == "dctitle":
                title = item.text
            elif item.attrib["name"] == "dclink":
                url = item.text
            elif item.attrib["name"] == "dcdescription":
                content = item.text[:300]
                if len(item.text) > 300:
                    content += "..."
        # dates returned by the BASE API are not several formats
        publishedDate = None
        for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']:
            try:
                publishedDate = datetime.strptime(date, date_format)
                break
            except:
                pass
        if publishedDate is not None:
            res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
        else:
            res_dict = {'url': url, 'title': title, 'content': content}
        results.append(res_dict)
    return results
@@ -0,0 +1,337 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This is the implementation of the Bing-WEB engine. Some of this
 implementations are shared by other engines:
 - :ref:`bing images engine`
 - :ref:`bing news engine`
 - :ref:`bing videos engine`
 On the `preference page`_ Bing offers a lot of languages an regions (see section
 'Search results languages' and 'Country/region').  However, the abundant choice
 does not correspond to reality, where Bing has a full-text indexer only for a
 limited number of languages.  By example: you can select a language like Māori
 but you never get a result in this language.
 What comes a bit closer to the truth are the `search-APIs`_ but they don`t seem
 to be completely correct either (if you take a closer look you will find some
 inaccuracies there too):
 - :py:obj:`searx.engines.bing.bing_traits_url`
 - :py:obj:`searx.engines.bing_videos.bing_traits_url`
 - :py:obj:`searx.engines.bing_images.bing_traits_url`
 - :py:obj:`searx.engines.bing_news.bing_traits_url`
 .. _preference page: https://www.bing.com/account/general
 .. _search-APIs: https://learn.microsoft.com/en-us/bing/search-apis/
 """
 # pylint: disable=too-many-branches, invalid-name
 from typing import TYPE_CHECKING
 import datetime
 import re
 import uuid
 from urllib.parse import urlencode
 from lxml import html
 import babel
 import babel.languages
 from searx.utils import eval_xpath, extract_text, eval_xpath_list, eval_xpath_getindex
 from searx.locales import language_tag, region_tag
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://www.bing.com',
    "wikidata_id": 'Q182496',
    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 send_accept_language_header = True
 """Bing tries to guess user's language and territory from the HTTP
 Accept-Language.  Optional the user can select a search-language (can be
 different to the UI language) and a region (market code)."""
 # engine dependent config
 categories = ['general', 'web']
 paging = True
 time_range_support = True
 safesearch = True
 safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}  # cookie: ADLT=STRICT
 base_url = 'https://www.bing.com/search'
 """Bing (Web) search URL"""
 bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/reference/market-codes'
 """Bing (Web) search API description"""
 def _get_offset_from_pageno(pageno):
    return (pageno - 1) * 10 + 1
 def set_bing_cookies(params, engine_language, engine_region, SID):
    # set cookies
    # -----------
    params['cookies']['_EDGE_V'] = '1'
    # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
    _EDGE_S = [
        'F=1',
        'SID=%s' % SID,
        'mkt=%s' % engine_region.lower(),
        'ui=%s' % engine_language.lower(),
    ]
    params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
    logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
    # "_EDGE_CD": "m=zh-tw",
    _EDGE_CD = [  # pylint: disable=invalid-name
        'm=%s' % engine_region.lower(),  # search region: zh-cn
        'u=%s' % engine_language.lower(),  # UI: en-us
    ]
    params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
    logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
    SRCHHPGUSR = [  # pylint: disable=invalid-name
        'SRCHLANG=%s' % engine_language,
        # Trying to set ADLT cookie here seems not to have any effect, I assume
        # there is some age verification by a cookie (and/or session ID) needed,
        # to disable the SafeSearch.
        'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
    ]
    params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
    logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
 def request(query, params):
    """Assemble a Bing-Web request."""
    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
    engine_language = traits.get_language(params['searxng_locale'], 'en')
    SID = uuid.uuid1().hex.upper()
    CVID = uuid.uuid1().hex.upper()
    set_bing_cookies(params, engine_language, engine_region, SID)
    # build URL query
    # ---------------
    # query term
    page = int(params.get('pageno', 1))
    query_params = {
        # fmt: off
        'q': query,
        'pq': query,
        'cvid': CVID,
        'qs': 'n',
        'sp': '-1'
        # fmt: on
    }
    # page
    if page > 1:
        referer = base_url + '?' + urlencode(query_params)
        params['headers']['Referer'] = referer
        logger.debug("headers.Referer --> %s", referer)
    query_params['first'] = _get_offset_from_pageno(page)
    if page == 2:
        query_params['FORM'] = 'PERE'
    elif page > 2:
        query_params['FORM'] = 'PERE%s' % (page - 2)
    filters = ''
    if params['time_range']:
        query_params['filt'] = 'custom'
        if params['time_range'] == 'day':
            filters = 'ex1:"ez1"'
        elif params['time_range'] == 'week':
            filters = 'ex1:"ez2"'
        elif params['time_range'] == 'month':
            filters = 'ex1:"ez3"'
        elif params['time_range'] == 'year':
            epoch_1970 = datetime.date(1970, 1, 1)
            today_no = (datetime.date.today() - epoch_1970).days
            filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
    params['url'] = base_url + '?' + urlencode(query_params)
    if filters:
        params['url'] = params['url'] + '&filters=' + filters
    return params
 def response(resp):
    # pylint: disable=too-many-locals,import-outside-toplevel
    from searx.network import Request, multi_requests  # see https://github.com/searxng/searxng/issues/762
    results = []
    result_len = 0
    dom = html.fromstring(resp.text)
    # parse results again if nothing is found yet
    url_to_resolve = []
    url_to_resolve_index = []
    i = 0
    for result in eval_xpath_list(dom, '//ol[@id="b_results"]/li[contains(@class, "b_algo")]'):
        link = eval_xpath_getindex(result, './/h2/a', 0, None)
        if link is None:
            continue
        url = link.attrib.get('href')
        title = extract_text(link)
        content = eval_xpath(result, '(.//p)[1]')
        for p in content:
            # Make sure that the element is free of <a href> links
            for e in p.xpath('.//a'):
                e.getparent().remove(e)
        content = extract_text(content)
        # get the real URL either using the URL shown to user or following the Bing URL
        if url.startswith('https://www.bing.com/ck/a?'):
            url_cite = extract_text(eval_xpath(result, './/div[@class="b_attribution"]/cite'))
            # Bing can shorten the URL either at the end or in the middle of the string
            if (
                url_cite
                and url_cite.startswith('https://')
                and '…' not in url_cite
                and '...' not in url_cite
                and '›' not in url_cite
            ):
                # no need for an additional HTTP request
                url = url_cite
            else:
                # resolve the URL with an additional HTTP request
                url_to_resolve.append(url.replace('&ntb=1', '&ntb=F'))
                url_to_resolve_index.append(i)
                url = None  # remove the result if the HTTP Bing redirect raise an exception
        # append result
        results.append({'url': url, 'title': title, 'content': content})
        # increment result pointer for the next iteration in this loop
        i += 1
    # resolve all Bing redirections in parallel
    request_list = [
        Request.get(u, allow_redirects=False, headers=resp.search_params['headers']) for u in url_to_resolve
    ]
    response_list = multi_requests(request_list)
    for i, redirect_response in enumerate(response_list):
        if not isinstance(redirect_response, Exception):
            results[url_to_resolve_index[i]]['url'] = redirect_response.headers['location']
    # get number_of_results
    try:
        result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()'))
        if "-" in result_len_container:
            # Remove the part "from-to" for paginated request ...
            result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :]
        result_len_container = re.sub('[^0-9]', '', result_len_container)
        if len(result_len_container) > 0:
            result_len = int(result_len_container)
    except Exception as e:  # pylint: disable=broad-except
        logger.debug('result error :\n%s', e)
    if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
        return []
    results.append({'number_of_results': result_len})
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and regions from Bing-Web."""
    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
 def _fetch_traits(engine_traits: EngineTraits, url: str, xpath_language_codes: str, xpath_market_codes: str):
    # pylint: disable=too-many-locals,import-outside-toplevel
    from searx.network import get  # see https://github.com/searxng/searxng/issues/762
    # insert alias to map from a language (zh) to a language + script (zh_Hans)
    engine_traits.languages['zh'] = 'zh-hans'
    resp = get(url)
    if not resp.ok:  # type: ignore
        print("ERROR: response from peertube is not OK.")
    dom = html.fromstring(resp.text)  # type: ignore
    map_lang = {'jp': 'ja'}
    for td in eval_xpath(dom, xpath_language_codes):
        eng_lang = td.text
        if eng_lang in ('en-gb', 'pt-br'):
            # language 'en' is already in the list and a language 'en-gb' can't
            # be handled in SearXNG, same with pt-br which is covered by pt-pt.
            continue
        babel_lang = map_lang.get(eng_lang, eng_lang).replace('-', '_')
        try:
            sxng_tag = language_tag(babel.Locale.parse(babel_lang))
        except babel.UnknownLocaleError:
            print("ERROR: language (%s) is unknown by babel" % (eng_lang))
            continue
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
            continue
        engine_traits.languages[sxng_tag] = eng_lang
    map_region = {
        'en-ID': 'id_ID',
        'no-NO': 'nb_NO',
    }
    for td in eval_xpath(dom, xpath_market_codes):
        eng_region = td.text
        babel_region = map_region.get(eng_region, eng_region).replace('-', '_')
        if eng_region == 'en-WW':
            engine_traits.all_locale = eng_region
            continue
        try:
            sxng_tag = region_tag(babel.Locale.parse(babel_region))
        except babel.UnknownLocaleError:
            print("ERROR: region (%s) is unknown by babel" % (eng_region))
            continue
        conflict = engine_traits.regions.get(sxng_tag)
        if conflict:
            if conflict != eng_region:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_region))
            continue
        engine_traits.regions[sxng_tag] = eng_region
@@ -0,0 +1,132 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Bing-Images: description see :py:obj:`searx.engines.bing`.
 """
 # pylint: disable=invalid-name
 from typing import TYPE_CHECKING
 import uuid
 import json
 from urllib.parse import urlencode
 from lxml import html
 from searx.enginelib.traits import EngineTraits
 from searx.engines.bing import (
    set_bing_cookies,
    _fetch_traits,
 )
 from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://www.bing.com/images',
    "wikidata_id": 'Q182496',
    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['images', 'web']
 paging = True
 safesearch = True
 time_range_support = True
 base_url = 'https://www.bing.com/images/async'
 """Bing (Images) search URL"""
 bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-image-search/reference/market-codes'
 """Bing (Images) search API description"""
 time_map = {
    # fmt: off
    'day': 60 * 24,
    'week': 60 * 24 * 7,
    'month': 60 * 24 * 31,
    'year': 60 * 24 * 365,
    # fmt: on
 }
 def request(query, params):
    """Assemble a Bing-Image request."""
    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
    engine_language = traits.get_language(params['searxng_locale'], 'en')
    SID = uuid.uuid1().hex.upper()
    set_bing_cookies(params, engine_language, engine_region, SID)
    # build URL query
    # - example: https://www.bing.com/images/async?q=foo&first=155&count=35
    query_params = {
        # fmt: off
        'q': query,
        'async' : 'content',
        # to simplify the page count lets use the default of 35 images per page
        'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
        'count' : 35,
        # fmt: on
    }
    # time range
    # - example: one year (525600 minutes) 'qft=+filterui:age-lt525600'
    if params['time_range']:
        query_params['qft'] = 'filterui:age-lt%s' % time_map[params['time_range']]
    params['url'] = base_url + '?' + urlencode(query_params)
    return params
 def response(resp):
    """Get response from Bing-Images"""
    results = []
    dom = html.fromstring(resp.text)
    for result in dom.xpath('//ul[contains(@class, "dgControl_list")]/li'):
        metadata = result.xpath('.//a[@class="iusc"]/@m')
        if not metadata:
            continue
        metadata = json.loads(result.xpath('.//a[@class="iusc"]/@m')[0])
        title = ' '.join(result.xpath('.//div[@class="infnmpt"]//a/text()')).strip()
        img_format = ' '.join(result.xpath('.//div[@class="imgpt"]/div/span/text()')).strip()
        source = ' '.join(result.xpath('.//div[@class="imgpt"]//div[@class="lnkw"]//a/text()')).strip()
        results.append(
            {
                'template': 'images.html',
                'url': metadata['purl'],
                'thumbnail_src': metadata['turl'],
                'img_src': metadata['murl'],
                'content': metadata['desc'],
                'title': title,
                'source': source,
                'img_format': img_format,
            }
        )
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and regions from Bing-News."""
    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
@@ -0,0 +1,150 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Bing-News: description see :py:obj:`searx.engines.bing`.
 """
 # pylint: disable=invalid-name
 from typing import TYPE_CHECKING
 import uuid
 from urllib.parse import urlencode
 from lxml import html
 from searx.enginelib.traits import EngineTraits
 from searx.engines.bing import (
    set_bing_cookies,
    _fetch_traits,
 )
 from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://www.bing.com/news',
    "wikidata_id": 'Q2878637',
    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'RSS',
 }
 # engine dependent config
 categories = ['news']
 paging = True
 time_range_support = True
 time_map = {
    'day': '4',
    'week': '8',
    'month': '9',
 }
 """A string '4' means *last hour*. We use *last hour* for ``day`` here since the
 difference of *last day* and *last week* in the result list is just marginally.
 """
 base_url = 'https://www.bing.com/news/infinitescrollajax'
 """Bing (News) search URL"""
 bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-news-search/reference/market-codes'
 """Bing (News) search API description"""
 mkt_alias = {
    'zh': 'en-WW',
    'zh-CN': 'en-WW',
 }
 """Bing News has an official market code 'zh-CN' but we won't get a result with
 this market code.  For 'zh' and 'zh-CN' we better use the *Worldwide aggregate*
 market code (en-WW).
 """
 def request(query, params):
    """Assemble a Bing-News request."""
    sxng_locale = params['searxng_locale']
    engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
    engine_language = traits.get_language(sxng_locale, 'en')
    SID = uuid.uuid1().hex.upper()
    set_bing_cookies(params, engine_language, engine_region, SID)
    # build URL query
    #
    # example: https://www.bing.com/news/infinitescrollajax?q=london&first=1
    query_params = {
        # fmt: off
        'q': query,
        'InfiniteScroll': 1,
        # to simplify the page count lets use the default of 10 images per page
        'first' : (int(params.get('pageno', 1)) - 1) * 10 + 1,
        # fmt: on
    }
    if params['time_range']:
        # qft=interval:"7"
        query_params['qft'] = 'qft=interval="%s"' % time_map.get(params['time_range'], '9')
    params['url'] = base_url + '?' + urlencode(query_params)
    return params
 def response(resp):
    """Get response from Bing-Video"""
    results = []
    if not resp.ok or not resp.text:
        return results
    dom = html.fromstring(resp.text)
    for newsitem in dom.xpath('//div[contains(@class, "newsitem")]'):
        url = newsitem.xpath('./@url')[0]
        title = ' '.join(newsitem.xpath('.//div[@class="caption"]//a[@class="title"]/text()')).strip()
        content = ' '.join(newsitem.xpath('.//div[@class="snippet"]/text()')).strip()
        thumbnail = None
        author = newsitem.xpath('./@data-author')[0]
        metadata = ' '.join(newsitem.xpath('.//div[@class="source"]/span/text()')).strip()
        img_src = newsitem.xpath('.//a[@class="imagelink"]//img/@src')
        if img_src:
            thumbnail = 'https://www.bing.com/' + img_src[0]
        results.append(
            {
                'url': url,
                'title': title,
                'content': content,
                'img_src': thumbnail,
                'author': author,
                'metadata': metadata,
            }
        )
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and regions from Bing-News.
    The :py:obj:`description <searx.engines.bing_news.bing_traits_url>` of the
    first table says *"query parameter when calling the Video Search API."*
    .. thats why I use the 4. table "News Category API markets" for the
    ``xpath_market_codes``.
    """
    xpath_market_codes = '//table[4]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
@@ -0,0 +1,128 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Bing-Videos: description see :py:obj:`searx.engines.bing`.
 """
 # pylint: disable=invalid-name
 from typing import TYPE_CHECKING
 import uuid
 import json
 from urllib.parse import urlencode
 from lxml import html
 from searx.enginelib.traits import EngineTraits
 from searx.engines.bing import (
    set_bing_cookies,
    _fetch_traits,
 )
 from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://www.bing.com/videos',
    "wikidata_id": 'Q4914152',
    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['videos', 'web']
 paging = True
 safesearch = True
 time_range_support = True
 base_url = 'https://www.bing.com/videos/asyncv2'
 """Bing (Videos) async search URL."""
 bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-video-search/reference/market-codes'
 """Bing (Video) search API description"""
 time_map = {
    # fmt: off
    'day': 60 * 24,
    'week': 60 * 24 * 7,
    'month': 60 * 24 * 31,
    'year': 60 * 24 * 365,
    # fmt: on
 }
 def request(query, params):
    """Assemble a Bing-Video request."""
    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
    engine_language = traits.get_language(params['searxng_locale'], 'en')
    SID = uuid.uuid1().hex.upper()
    set_bing_cookies(params, engine_language, engine_region, SID)
    # build URL query
    #
    # example: https://www.bing.com/videos/asyncv2?q=foo&async=content&first=1&count=35
    query_params = {
        # fmt: off
        'q': query,
        'async' : 'content',
        # to simplify the page count lets use the default of 35 images per page
        'first' : (int(params.get('pageno', 1)) - 1) * 35 + 1,
        'count' : 35,
        # fmt: on
    }
    # time range
    #
    # example: one week (10080 minutes) '&qft= filterui:videoage-lt10080'  '&form=VRFLTR'
    if params['time_range']:
        query_params['form'] = 'VRFLTR'
        query_params['qft'] = ' filterui:videoage-lt%s' % time_map[params['time_range']]
    params['url'] = base_url + '?' + urlencode(query_params)
    return params
 def response(resp):
    """Get response from Bing-Video"""
    results = []
    dom = html.fromstring(resp.text)
    for result in dom.xpath('//div[@class="dg_u"]//div[contains(@id, "mc_vtvc_video")]'):
        metadata = json.loads(result.xpath('.//div[@class="vrhdata"]/@vrhm')[0])
        info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip()
        content = '{0} - {1}'.format(metadata['du'], info)
        thumbnail = result.xpath('.//div[contains(@class, "mc_vtvc_th")]//img/@src')[0]
        results.append(
            {
                'url': metadata['murl'],
                'thumbnail': thumbnail,
                'title': metadata.get('vt', ''),
                'content': content,
                'template': 'videos.html',
            }
        )
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages and regions from Bing-Videos."""
    xpath_market_codes = '//table[1]/tbody/tr/td[3]'
    # xpath_country_codes = '//table[2]/tbody/tr/td[2]'
    xpath_language_codes = '//table[3]/tbody/tr/td[2]'
    _fetch_traits(engine_traits, bing_traits_url, xpath_language_codes, xpath_market_codes)
@@ -0,0 +1,419 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Brave supports the categories listed in :py:obj:`brave_category` (General,
 news, videos, images).  The support of :py:obj:`paging` and :py:obj:`time range
 <time_range_support>` is limited (see remarks).
 Configured ``brave`` engines:
 .. code:: yaml
  - name: brave
    engine: brave
    ...
    brave_category: search
    time_range_support: true
    paging: true
  - name: brave.images
    engine: brave
    ...
    brave_category: images
  - name: brave.videos
    engine: brave
    ...
    brave_category: videos
  - name: brave.news
    engine: brave
    ...
    brave_category: news
 .. _brave regions:
 Brave regions
 =============
 Brave uses two-digit tags for the regions like ``ca`` while SearXNG deals with
 locales.  To get a mapping, all *officatl de-facto* languages of the Brave
 region are mapped to regions in SearXNG (see :py:obj:`babel
 <babel.languages.get_official_languages>`):
 .. code:: python
    "regions": {
      ..
      "en-CA": "ca",
      "fr-CA": "ca",
      ..
     }
 .. note::
   The language (aka region) support of Brave's index is limited to very basic
   languages.  The search results for languages like Chinese or Arabic are of
   low quality.
 .. _brave languages:
 Brave languages
 ===============
 Brave's language support is limited to the UI (menues, area local notations,
 etc).  Brave's index only seems to support a locale, but it does not seem to
 support any languages in its index.  The choice of available languages is very
 small (and its not clear to me where the differencee in UI is when switching
 from en-us to en-ca or en-gb).
 In the :py:obj:`EngineTraits object <searx.enginelib.traits.EngineTraits>` the
 UI languages are stored in a custom field named ``ui_lang``:
 .. code:: python
    "custom": {
      "ui_lang": {
        "ca": "ca",
        "de-DE": "de-de",
        "en-CA": "en-ca",
        "en-GB": "en-gb",
        "en-US": "en-us",
        "es": "es",
        "fr-CA": "fr-ca",
        "fr-FR": "fr-fr",
        "ja-JP": "ja-jp",
        "pt-BR": "pt-br",
        "sq-AL": "sq-al"
      }
    },
 Implementations
 ===============
 """
 from typing import TYPE_CHECKING
 import re
 from urllib.parse import (
    urlencode,
    urlparse,
    parse_qs,
 )
 import chompjs
 from lxml import html
 from searx import locales
 from searx.utils import (
    extract_text,
    eval_xpath_list,
    eval_xpath_getindex,
 )
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://search.brave.com/',
    "wikidata_id": 'Q22906900',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 base_url = "https://search.brave.com/"
 categories = []
 brave_category = 'search'
 """Brave supports common web-search, video search, image and video search.
 - ``search``: Common WEB search
 - ``videos``: search for videos
 - ``images``: search for images
 - ``news``: search for news
 """
 brave_spellcheck = False
 """Brave supports some kind of spell checking.  When activated, Brave tries to
 fix typos, e.g. it searches for ``food`` when the user queries for ``fooh``.  In
 the UI of Brave the user gets warned about this, since we can not warn the user
 in SearXNG, the spellchecking is disabled by default.
 """
 send_accept_language_header = True
 paging = False
 """Brave only supports paging in :py:obj:`brave_category` ``search`` (UI
 category All)."""
 safesearch = True
 safesearch_map = {2: 'strict', 1: 'moderate', 0: 'off'}  # cookie: safesearch=off
 time_range_support = False
 """Brave only supports time-range in :py:obj:`brave_category` ``search`` (UI
 category All)."""
 time_range_map = {
    'day': 'pd',
    'week': 'pw',
    'month': 'pm',
    'year': 'py',
 }
 def request(query, params):
    # Don't accept br encoding / see https://github.com/searxng/searxng/pull/1787
    params['headers']['Accept-Encoding'] = 'gzip, deflate'
    args = {
        'q': query,
    }
    if brave_spellcheck:
        args['spellcheck'] = '1'
    if brave_category == 'search':
        if params.get('pageno', 1) - 1:
            args['offset'] = params.get('pageno', 1) - 1
        if time_range_map.get(params['time_range']):
            args['tf'] = time_range_map.get(params['time_range'])
    params["url"] = f"{base_url}{brave_category}?{urlencode(args)}"
    # set properties in the cookies
    params['cookies']['safesearch'] = safesearch_map.get(params['safesearch'], 'off')
    # the useLocation is IP based, we use cookie 'country' for the region
    params['cookies']['useLocation'] = '0'
    params['cookies']['summarizer'] = '0'
    engine_region = traits.get_region(params['searxng_locale'], 'all')
    params['cookies']['country'] = engine_region.split('-')[-1].lower()  # type: ignore
    ui_lang = locales.get_engine_locale(params['searxng_locale'], traits.custom["ui_lang"], 'en-us')
    params['cookies']['ui_lang'] = ui_lang
    logger.debug("cookies %s", params['cookies'])
 def response(resp):
    if brave_category == 'search':
        return _parse_search(resp)
    datastr = ""
    for line in resp.text.split("\n"):
        if "const data = " in line:
            datastr = line.replace("const data = ", "").strip()[:-1]
            break
    json_data = chompjs.parse_js_object(datastr)
    json_resp = json_data[1]['data']['body']['response']
    if brave_category == 'news':
        json_resp = json_resp['news']
        return _parse_news(json_resp)
    if brave_category == 'images':
        return _parse_images(json_resp)
    if brave_category == 'videos':
        return _parse_videos(json_resp)
    raise ValueError(f"Unsupported brave category: {brave_category}")
 def _parse_search(resp):
    result_list = []
    dom = html.fromstring(resp.text)
    answer_tag = eval_xpath_getindex(dom, '//div[@class="answer"]', 0, default=None)
    if answer_tag:
        result_list.append({'answer': extract_text(answer_tag)})
    # xpath_results = '//div[contains(@class, "snippet fdb") and @data-type="web"]'
    xpath_results = '//div[contains(@class, "snippet")]'
    for result in eval_xpath_list(dom, xpath_results):
        url = eval_xpath_getindex(result, './/a[@class="result-header"]/@href', 0, default=None)
        title_tag = eval_xpath_getindex(result, './/span[@class="snippet-title"]', 0, default=None)
        if not (url and title_tag):
            continue
        content_tag = eval_xpath_getindex(result, './/p[@class="snippet-description"]', 0, default='')
        img_src = eval_xpath_getindex(result, './/img[@class="thumb"]/@src', 0, default='')
        item = {
            'url': url,
            'title': extract_text(title_tag),
            'content': extract_text(content_tag),
            'img_src': img_src,
        }
        video_tag = eval_xpath_getindex(
            result, './/div[contains(@class, "video-snippet") and @data-macro="video"]', 0, default=None
        )
        if video_tag is not None:
            # In my tests a video tag in the WEB search was mostoften not a
            # video, except the ones from youtube ..
            iframe_src = _get_iframe_src(url)
            if iframe_src:
                item['iframe_src'] = iframe_src
                item['template'] = 'videos.html'
                item['thumbnail'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
            else:
                item['img_src'] = eval_xpath_getindex(video_tag, './/img/@src', 0, default='')
        result_list.append(item)
    return result_list
 def _get_iframe_src(url):
    parsed_url = urlparse(url)
    if parsed_url.path == '/watch' and parsed_url.query:
        video_id = parse_qs(parsed_url.query).get('v', [])  # type: ignore
        if video_id:
            return 'https://www.youtube-nocookie.com/embed/' + video_id[0]  # type: ignore
    return None
 def _parse_news(json_resp):
    result_list = []
    for result in json_resp["results"]:
        item = {
            'url': result['url'],
            'title': result['title'],
            'content': result['description'],
        }
        if result['thumbnail'] != "null":
            item['img_src'] = result['thumbnail']['src']
        result_list.append(item)
    return result_list
 def _parse_images(json_resp):
    result_list = []
    for result in json_resp["results"]:
        item = {
            'url': result['url'],
            'title': result['title'],
            'content': result['description'],
            'template': 'images.html',
            'img_format': result['properties']['format'],
            'source': result['source'],
            'img_src': result['properties']['url'],
        }
        result_list.append(item)
    return result_list
 def _parse_videos(json_resp):
    result_list = []
    for result in json_resp["results"]:
        url = result['url']
        item = {
            'url': url,
            'title': result['title'],
            'content': result['description'],
            'template': 'videos.html',
            'length': result['video']['duration'],
            'duration': result['video']['duration'],
        }
        if result['thumbnail'] != "null":
            item['thumbnail'] = result['thumbnail']['src']
        iframe_src = _get_iframe_src(url)
        if iframe_src:
            item['iframe_src'] = iframe_src
        result_list.append(item)
    return result_list
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch :ref:`languages <brave languages>` and :ref:`regions <brave
    regions>` from Brave."""
    # pylint: disable=import-outside-toplevel
    import babel.languages
    from searx.locales import region_tag, language_tag
    from searx.network import get  # see https://github.com/searxng/searxng/issues/762
    engine_traits.custom["ui_lang"] = {}
    headers = {
        'Accept-Encoding': 'gzip, deflate',
    }
    lang_map = {'no': 'nb'}  # norway
    # languages (UI)
    resp = get('https://search.brave.com/settings', headers=headers)
    if not resp.ok:  # type: ignore
        print("ERROR: response from Brave is not OK.")
    dom = html.fromstring(resp.text)  # type: ignore
    for option in dom.xpath('//div[@id="language-select"]//option'):
        ui_lang = option.get('value')
        try:
            if '-' in ui_lang:
                sxng_tag = region_tag(babel.Locale.parse(ui_lang, sep='-'))
            else:
                sxng_tag = language_tag(babel.Locale.parse(ui_lang))
        except babel.UnknownLocaleError:
            print("ERROR: can't determine babel locale of Brave's (UI) language %s" % ui_lang)
            continue
        conflict = engine_traits.custom["ui_lang"].get(sxng_tag)
        if conflict:
            if conflict != ui_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, ui_lang))
            continue
        engine_traits.custom["ui_lang"][sxng_tag] = ui_lang
    # search regions of brave
    engine_traits.all_locale = 'all'
    for country in dom.xpath('//div[@id="sidebar"]//ul/li/div[contains(@class, "country")]'):
        flag = country.xpath('./span[contains(@class, "flag")]')[0]
        # country_name = extract_text(flag.xpath('./following-sibling::*')[0])
        country_tag = re.search(r'flag-([^\s]*)\s', flag.xpath('./@class')[0]).group(1)  # type: ignore
        # add offical languages of the country ..
        for lang_tag in babel.languages.get_official_languages(country_tag, de_facto=True):
            lang_tag = lang_map.get(lang_tag, lang_tag)
            sxng_tag = region_tag(babel.Locale.parse('%s_%s' % (lang_tag, country_tag.upper())))
            # print("%-20s: %s <-- %s" % (country_name, country_tag, sxng_tag))
            conflict = engine_traits.regions.get(sxng_tag)
            if conflict:
                if conflict != country_tag:
                    print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, country_tag))
                    continue
            engine_traits.regions[sxng_tag] = country_tag
@@ -0,0 +1,124 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """BT4G_ (bt4g.com) is not a tracker and doesn't store any content and only
 collects torrent metadata (such as file names and file sizes) and a magnet link
 (torrent identifier).
 This engine does not parse the HTML page because there is an API in XML (RSS).
 The RSS feed provides fewer data like amount of seeders/leechers and the files
 in the torrent file.  It's a tradeoff for a "stable" engine as the XML from RSS
 content will change way less than the HTML page.
 .. _BT4G: https://bt4g.com/
 Configuration
 =============
 The engine has the following additional settings:
 - :py:obj:`bt4g_order_by`
 - :py:obj:`bt4g_category`
 With this options a SearXNG maintainer is able to configure **additional**
 engines for specific torrent searches.  For example a engine to search only for
 Movies and sort the result list by the count of seeders.
 .. code:: yaml
  - name: bt4g.movie
    engine: bt4g
    shortcut: bt4gv
    categories: video
    bt4g_order_by: seeders
    bt4g_category: 'movie'
 Implementations
 ===============
 """
 import re
 from datetime import datetime
 from urllib.parse import quote
 from lxml import etree
 from searx.utils import get_torrent_size
 # about
 about = {
    "website": 'https://bt4gprx.com',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'XML',
 }
 # engine dependent config
 categories = ['files']
 paging = True
 time_range_support = True
 # search-url
 url = 'https://bt4gprx.com'
 search_url = url + '/search?q={search_term}&orderby={order_by}&category={category}&p={pageno}&page=rss'
 bt4g_order_by = 'relevance'
 """Result list can be ordered by ``relevance`` (default), ``size``, ``seeders``
 or ``time``.
 .. hint::
  When *time_range* is activate, the results always orderd by ``time``.
 """
 bt4g_category = 'all'
 """BT$G offers categoies: ``all`` (default), ``audio``, ``movie``, ``doc``,
 ``app`` and `` other``.
 """
 def request(query, params):
    order_by = bt4g_order_by
    if params['time_range']:
        order_by = 'time'
    params['url'] = search_url.format(
        search_term=quote(query),
        order_by=order_by,
        category=bt4g_category,
        pageno=params['pageno'],
    )
    return params
 def response(resp):
    results = []
    search_results = etree.XML(resp.content)
    # return empty array if nothing is found
    if len(search_results) == 0:
        return []
    for entry in search_results.xpath('./channel/item'):
        title = entry.find("title").text
        link = entry.find("guid").text
        fullDescription = entry.find("description").text.split('<br>')
        filesize = fullDescription[1]
        filesizeParsed = re.split(r"([A-Z]+)", filesize)
        magnetlink = entry.find("link").text
        pubDate = entry.find("pubDate").text
        results.append(
            {
                'url': link,
                'title': title,
                'magnetlink': magnetlink,
                'seed': 'N/A',
                'leech': 'N/A',
                'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]),
                'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
                'template': 'torrent.html',
            }
        )
    return results
@@ -0,0 +1,89 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 BTDigg (Videos, Music, Files)
 """
 from lxml import html
 from urllib.parse import quote, urljoin
 from searx.utils import extract_text, get_torrent_size
 # about
 about = {
    "website": 'https://btdig.com',
    "wikidata_id": 'Q4836698',
    "official_api_documentation": {'url': 'https://btdig.com/contacts', 'comment': 'on demand'},
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['files']
 paging = True
 # search-url
 url = 'https://btdig.com'
 search_url = url + '/search?q={search_term}&p={pageno}'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'] - 1)
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('//div[@class="one_result"]')
    # return empty array if nothing is found
    if not search_res:
        return []
    # parse results
    for result in search_res:
        link = result.xpath('.//div[@class="torrent_name"]//a')[0]
        href = urljoin(url, link.attrib.get('href'))
        title = extract_text(link)
        excerpt = result.xpath('.//div[@class="torrent_excerpt"]')[0]
        content = html.tostring(excerpt, encoding='unicode', method='text', with_tail=False)
        # it is better to emit <br/> instead of |, but html tags are verboten
        content = content.strip().replace('\n', ' | ')
        content = ' '.join(content.split())
        filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0]
        filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1]
        files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0]
        # convert filesize to byte if possible
        filesize = get_torrent_size(filesize, filesize_multiplier)
        # convert files to int if possible
        try:
            files = int(files)
        except:
            files = None
        magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href']
        # append result
        results.append(
            {
                'url': href,
                'title': title,
                'content': content,
                'filesize': filesize,
                'files': files,
                'magnetlink': magnetlink,
                'template': 'torrent.html',
            }
        )
    # return results sorted by seeder
    return results
@@ -0,0 +1,243 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """With *command engines* administrators can run engines to integrate arbitrary
 shell commands.
 .. attention::
   When creating and enabling a ``command`` engine on a public instance, you
   must be careful to avoid leaking private data.
 The easiest solution is to limit the access by setting ``tokens`` as described
 in section :ref:`private engines`.  The engine base is flexible.  Only your
 imagination can limit the power of this engine (and maybe security concerns).
 Configuration
 =============
 The following options are available:
 ``command``:
  A comma separated list of the elements of the command.  A special token
  ``{{QUERY}}`` tells where to put the search terms of the user. Example:
  .. code:: yaml
     ['ls', '-l', '-h', '{{QUERY}}']
 ``delimiter``:
  A mapping containing a delimiter ``char`` and the *titles* of each element in
  ``keys``.
 ``parse_regex``:
  A dict containing the regular expressions for each result key.
 ``query_type``:
  The expected type of user search terms.  Possible values: ``path`` and
  ``enum``.
  ``path``:
    Checks if the user provided path is inside the working directory.  If not,
    the query is not executed.
  ``enum``:
    Is a list of allowed search terms.  If the user submits something which is
    not included in the list, the query returns an error.
 ``query_enum``:
  A list containing allowed search terms if ``query_type`` is set to ``enum``.
 ``working_dir``:
  The directory where the command has to be executed.  Default: ``./``.
 ``result_separator``:
  The character that separates results. Default: ``\\n``.
 Example
 =======
 The example engine below can be used to find files with a specific name in the
 configured working directory:
 .. code:: yaml
  - name: find
    engine: command
    command: ['find', '.', '-name', '{{QUERY}}']
    query_type: path
    shortcut: fnd
    delimiter:
        chars: ' '
        keys: ['line']
 Implementations
 ===============
 """
 import re
 from os.path import expanduser, isabs, realpath, commonprefix
 from shlex import split as shlex_split
 from subprocess import Popen, PIPE
 from threading import Thread
 from searx import logger
 engine_type = 'offline'
 paging = True
 command = []
 delimiter = {}
 parse_regex = {}
 query_type = ''
 query_enum = []
 environment_variables = {}
 working_dir = realpath('.')
 result_separator = '\n'
 result_template = 'key-value.html'
 timeout = 4.0
 _command_logger = logger.getChild('command')
 _compiled_parse_regex = {}
 def init(engine_settings):
    check_parsing_options(engine_settings)
    if 'command' not in engine_settings:
        raise ValueError('engine command : missing configuration key: command')
    global command, working_dir, delimiter, parse_regex, environment_variables
    command = engine_settings['command']
    if 'working_dir' in engine_settings:
        working_dir = engine_settings['working_dir']
        if not isabs(engine_settings['working_dir']):
            working_dir = realpath(working_dir)
    if 'parse_regex' in engine_settings:
        parse_regex = engine_settings['parse_regex']
        for result_key, regex in parse_regex.items():
            _compiled_parse_regex[result_key] = re.compile(regex, flags=re.MULTILINE)
    if 'delimiter' in engine_settings:
        delimiter = engine_settings['delimiter']
    if 'environment_variables' in engine_settings:
        environment_variables = engine_settings['environment_variables']
 def search(query, params):
    cmd = _get_command_to_run(query)
    if not cmd:
        return []
    results = []
    reader_thread = Thread(target=_get_results_from_process, args=(results, cmd, params['pageno']))
    reader_thread.start()
    reader_thread.join(timeout=timeout)
    return results
 def _get_command_to_run(query):
    params = shlex_split(query)
    __check_query_params(params)
    cmd = []
    for c in command:
        if c == '{{QUERY}}':
            cmd.extend(params)
        else:
            cmd.append(c)
    return cmd
 def _get_results_from_process(results, cmd, pageno):
    leftover = ''
    count = 0
    start, end = __get_results_limits(pageno)
    with Popen(cmd, stdout=PIPE, stderr=PIPE, env=environment_variables) as process:
        line = process.stdout.readline()
        while line:
            buf = leftover + line.decode('utf-8')
            raw_results = buf.split(result_separator)
            if raw_results[-1]:
                leftover = raw_results[-1]
            raw_results = raw_results[:-1]
            for raw_result in raw_results:
                result = __parse_single_result(raw_result)
                if result is None:
                    _command_logger.debug('skipped result:', raw_result)
                    continue
                if start <= count and count <= end:
                    result['template'] = result_template
                    results.append(result)
                count += 1
                if end < count:
                    return results
            line = process.stdout.readline()
        return_code = process.wait(timeout=timeout)
        if return_code != 0:
            raise RuntimeError('non-zero return code when running command', cmd, return_code)
 def __get_results_limits(pageno):
    start = (pageno - 1) * 10
    end = start + 9
    return start, end
 def __check_query_params(params):
    if not query_type:
        return
    if query_type == 'path':
        query_path = params[-1]
        query_path = expanduser(query_path)
        if commonprefix([realpath(query_path), working_dir]) != working_dir:
            raise ValueError('requested path is outside of configured working directory')
    elif query_type == 'enum' and len(query_enum) > 0:
        for param in params:
            if param not in query_enum:
                raise ValueError('submitted query params is not allowed', param, 'allowed params:', query_enum)
 def check_parsing_options(engine_settings):
    """Checks if delimiter based parsing or regex parsing is configured correctly"""
    if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings:
        raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex')
    if 'delimiter' in engine_settings and 'parse_regex' in engine_settings:
        raise ValueError('failed to init settings for parsing lines: too many settings')
    if 'delimiter' in engine_settings:
        if 'chars' not in engine_settings['delimiter'] or 'keys' not in engine_settings['delimiter']:
            raise ValueError
 def __parse_single_result(raw_result):
    """Parses command line output based on configuration"""
    result = {}
    if delimiter:
        elements = raw_result.split(delimiter['chars'], maxsplit=len(delimiter['keys']) - 1)
        if len(elements) != len(delimiter['keys']):
            return {}
        for i in range(len(elements)):
            result[delimiter['keys'][i]] = elements[i]
    if parse_regex:
        for result_key, regex in _compiled_parse_regex.items():
            found = regex.search(raw_result)
            if not found:
                return {}
            result[result_key] = raw_result[found.start() : found.end()]
    return result
@@ -0,0 +1,116 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """CORE (science)
 """
 from datetime import datetime
 from urllib.parse import urlencode
 from searx.exceptions import SearxEngineAPIException
 about = {
    "website": 'https://core.ac.uk',
    "wikidata_id": 'Q22661180',
    "official_api_documentation": 'https://core.ac.uk/documentation/api/',
    "use_official_api": True,
    "require_api_key": True,
    "results": 'JSON',
 }
 categories = ['science', 'scientific publications']
 paging = True
 nb_per_page = 10
 api_key = 'unset'
 base_url = 'https://core.ac.uk:443/api-v2/search/'
 search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
 def request(query, params):
    if api_key == 'unset':
        raise SearxEngineAPIException('missing CORE API key')
    search_path = search_string.format(
        query=urlencode({'q': query}),
        nb_per_page=nb_per_page,
        page=params['pageno'],
        apikey=api_key,
    )
    params['url'] = base_url + search_path
    return params
 def response(resp):
    results = []
    json_data = resp.json()
    for result in json_data['data']:
        source = result['_source']
        url = None
        if source.get('urls'):
            url = source['urls'][0].replace('http://', 'https://', 1)
        if url is None and source.get('doi'):
            # use the DOI reference
            url = 'https://doi.org/' + source['doi']
        if url is None and source.get('downloadUrl'):
            # use the downloadUrl
            url = source['downloadUrl']
        if url is None and source.get('identifiers'):
            # try to find an ark id, see
            # https://www.wikidata.org/wiki/Property:P8091
            # and https://en.wikipedia.org/wiki/Archival_Resource_Key
            arkids = [
                identifier[5:]  # 5 is the length of "ark:/"
                for identifier in source.get('identifiers')
                if isinstance(identifier, str) and identifier.startswith('ark:/')
            ]
            if len(arkids) > 0:
                url = 'https://n2t.net/' + arkids[0]
        if url is None:
            continue
        publishedDate = None
        time = source['publishedDate'] or source['depositedDate']
        if time:
            publishedDate = datetime.fromtimestamp(time / 1000)
        # sometimes the 'title' is None / filter None values
        journals = [j['title'] for j in (source.get('journals') or []) if j['title']]
        publisher = source['publisher']
        if publisher:
            publisher = source['publisher'].strip("'")
        results.append(
            {
                'template': 'paper.html',
                'title': source['title'],
                'url': url,
                'content': source['description'] or '',
                # 'comments': '',
                'tags': source['topics'],
                'publishedDate': publishedDate,
                'type': (source['types'] or [None])[0],
                'authors': source['authors'],
                'editor': ', '.join(source['contributors'] or []),
                'publisher': publisher,
                'journal': ', '.join(journals),
                # 'volume': '',
                # 'pages' : '',
                # 'number': '',
                'doi': source['doi'],
                'issn': [x for x in [source.get('issn')] if x],
                'isbn': [x for x in [source.get('isbn')] if x],  # exists in the rawRecordXml
                'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'),
            }
        )
    return results
@@ -0,0 +1,60 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Semantic Scholar (Science)
 """
 # pylint: disable=use-dict-literal
 from urllib.parse import urlencode
 from searx.utils import html_to_text
 about = {
    "website": 'https://www.crossref.org/',
    "wikidata_id": 'Q5188229',
    "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
 }
 categories = ['science', 'scientific publications']
 paging = True
 search_url = 'https://api.crossref.org/works'
 def request(query, params):
    params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
    return params
 def response(resp):
    res = resp.json()
    results = []
    for record in res['message']['items']:
        record_type = record['type']
        if record_type == 'book-chapter':
            title = record['container-title'][0]
            if record['title'][0].lower().strip() != title.lower().strip():
                title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
            journal = None
        else:
            title = html_to_text(record['title'][0])
            journal = record.get('container-title', [None])[0]
        url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
        authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
        isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
        results.append(
            {
                'template': 'paper.html',
                'url': url,
                'title': title,
                'journal': journal,
                'volume': record.get('volume'),
                'type': record['type'],
                'content': html_to_text(record.get('abstract', '')),
                'publisher': record.get('publisher'),
                'authors': authors,
                'doi': record['DOI'],
                'isbn': isbn,
            }
        )
    return results
@@ -0,0 +1,56 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Currency convert (DuckDuckGo)
 """
 import json
 # about
 about = {
    "website": 'https://duckduckgo.com/',
    "wikidata_id": 'Q12805',
    "official_api_documentation": 'https://duckduckgo.com/api',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSONP',
    "description": "Service from DuckDuckGo.",
 }
 engine_type = 'online_currency'
 categories = []
 base_url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
 weight = 100
 https_support = True
 def request(_query, params):
    params['url'] = base_url.format(params['from'], params['to'])
    return params
 def response(resp):
    """remove first and last lines to get only json"""
    json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
    results = []
    try:
        conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount'])
    except ValueError:
        return results
    answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
        resp.search_params['amount'],
        resp.search_params['from'],
        resp.search_params['amount'] * conversion_rate,
        resp.search_params['to'],
        conversion_rate,
        resp.search_params['from_name'],
        resp.search_params['to_name'],
    )
    url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'.format(
        resp.search_params['from'].upper(), resp.search_params['to']
    )
    results.append({'answer': answer, 'url': url})
    return results
@@ -0,0 +1,252 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Dailymotion (Videos)
 ~~~~~~~~~~~~~~~~~~~~
 .. _REST GET: https://developers.dailymotion.com/tools/
 .. _Global API Parameters: https://developers.dailymotion.com/api/#global-parameters
 .. _Video filters API: https://developers.dailymotion.com/api/#video-filters
 .. _Fields selection: https://developers.dailymotion.com/api/#fields-selection
 """
 from typing import TYPE_CHECKING
 from datetime import datetime, timedelta
 from urllib.parse import urlencode
 import time
 import babel
 from searx.network import get, raise_for_httperror  # see https://github.com/searxng/searxng/issues/762
 from searx.utils import html_to_text
 from searx.exceptions import SearxEngineAPIException
 from searx.locales import region_tag, language_tag
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://www.dailymotion.com',
    "wikidata_id": 'Q769222',
    "official_api_documentation": 'https://www.dailymotion.com/developer',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 # engine dependent config
 categories = ['videos']
 paging = True
 number_of_results = 10
 time_range_support = True
 time_delta_dict = {
    "day": timedelta(days=1),
    "week": timedelta(days=7),
    "month": timedelta(days=31),
    "year": timedelta(days=365),
 }
 safesearch = True
 safesearch_params = {
    2: {'is_created_for_kids': 'true'},
    1: {'is_created_for_kids': 'true'},
    0: {},
 }
 """True if this video is "Created for Kids" / intends to target an audience
 under the age of 16 (``is_created_for_kids`` in `Video filters API`_ )
 """
 family_filter_map = {
    2: 'true',
    1: 'true',
    0: 'false',
 }
 """By default, the family filter is turned on. Setting this parameter to
 ``false`` will stop filtering-out explicit content from searches and global
 contexts (``family_filter`` in `Global API Parameters`_ ).
 """
 result_fields = [
    'allow_embed',
    'description',
    'title',
    'created_time',
    'duration',
    'url',
    'thumbnail_360_url',
    'id',
 ]
 """`Fields selection`_, by default, a few fields are returned. To request more
 specific fields, the ``fields`` parameter is used with the list of fields
 SearXNG needs in the response to build a video result list.
 """
 search_url = 'https://api.dailymotion.com/videos?'
 """URL to retrieve a list of videos.
 - `REST GET`_
 - `Global API Parameters`_
 - `Video filters API`_
 """
 iframe_src = "https://www.dailymotion.com/embed/video/{video_id}"
 """URL template to embed video in SearXNG's result list."""
 def request(query, params):
    if not query:
        return False
    eng_region: str = traits.get_region(params['searxng_locale'], 'en_US')  # type: ignore
    eng_lang = traits.get_language(params['searxng_locale'], 'en')
    args = {
        'search': query,
        'family_filter': family_filter_map.get(params['safesearch'], 'false'),
        'thumbnail_ratio': 'original',  # original|widescreen|square
        # https://developers.dailymotion.com/api/#video-filters
        'languages': eng_lang,
        'page': params['pageno'],
        'password_protected': 'false',
        'private': 'false',
        'sort': 'relevance',
        'limit': number_of_results,
        'fields': ','.join(result_fields),
    }
    args.update(safesearch_params.get(params['safesearch'], {}))
    # Don't add localization and country arguments if the user does select a
    # language (:de, :en, ..)
    if len(params['searxng_locale'].split('-')) > 1:
        # https://developers.dailymotion.com/api/#global-parameters
        args['localization'] = eng_region
        args['country'] = eng_region.split('_')[1]
        # Insufficient rights for the `ams_country' parameter of route `GET /videos'
        # 'ams_country': eng_region.split('_')[1],
    time_delta = time_delta_dict.get(params["time_range"])
    if time_delta:
        created_after = datetime.now() - time_delta
        args['created_after'] = datetime.timestamp(created_after)
    query_str = urlencode(args)
    params['url'] = search_url + query_str
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = resp.json()
    # check for an API error
    if 'error' in search_res:
        raise SearxEngineAPIException(search_res['error'].get('message'))
    raise_for_httperror(resp)
    # parse results
    for res in search_res.get('list', []):
        title = res['title']
        url = res['url']
        content = html_to_text(res['description'])
        if len(content) > 300:
            content = content[:300] + '...'
        publishedDate = datetime.fromtimestamp(res['created_time'], None)
        length = time.gmtime(res.get('duration'))
        if length.tm_hour:
            length = time.strftime("%H:%M:%S", length)
        else:
            length = time.strftime("%M:%S", length)
        thumbnail = res['thumbnail_360_url']
        thumbnail = thumbnail.replace("http://", "https://")
        item = {
            'template': 'videos.html',
            'url': url,
            'title': title,
            'content': content,
            'publishedDate': publishedDate,
            'length': length,
            'thumbnail': thumbnail,
        }
        # HINT: no mater what the value is, without API token videos can't shown
        # embedded
        if res['allow_embed']:
            item['iframe_src'] = iframe_src.format(video_id=res['id'])
        results.append(item)
    # return results
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch locales & languages from dailymotion.
    Locales fetched from `api/locales <https://api.dailymotion.com/locales>`_.
    There are duplications in the locale codes returned from Dailymotion which
    can be ignored::
      en_EN --> en_GB, en_US
      ar_AA --> ar_EG, ar_AE, ar_SA
    The language list `api/languages <https://api.dailymotion.com/languages>`_
    contains over 7000 *languages* codes (see PR1071_).  We use only those
    language codes that are used in the locales.
    .. _PR1071: https://github.com/searxng/searxng/pull/1071
    """
    resp = get('https://api.dailymotion.com/locales')
    if not resp.ok:  # type: ignore
        print("ERROR: response from dailymotion/locales is not OK.")
    for item in resp.json()['list']:  # type: ignore
        eng_tag = item['locale']
        if eng_tag in ('en_EN', 'ar_AA'):
            continue
        try:
            sxng_tag = region_tag(babel.Locale.parse(eng_tag))
        except babel.UnknownLocaleError:
            print("ERROR: item unknown --> %s" % item)
            continue
        conflict = engine_traits.regions.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.regions[sxng_tag] = eng_tag
    locale_lang_list = [x.split('_')[0] for x in engine_traits.regions.values()]
    resp = get('https://api.dailymotion.com/languages')
    if not resp.ok:  # type: ignore
        print("ERROR: response from dailymotion/languages is not OK.")
    for item in resp.json()['list']:  # type: ignore
        eng_tag = item['code']
        if eng_tag in locale_lang_list:
            sxng_tag = language_tag(babel.Locale.parse(eng_tag))
            engine_traits.languages[sxng_tag] = eng_tag
@@ -0,0 +1,62 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Deepl translation engine"""
 from json import loads
 about = {
    "website": 'https://deepl.com',
    "wikidata_id": 'Q43968444',
    "official_api_documentation": 'https://www.deepl.com/docs-api',
    "use_official_api": True,
    "require_api_key": True,
    "results": 'JSON',
 }
 engine_type = 'online_dictionary'
 categories = ['general']
 url = 'https://api-free.deepl.com/v2/translate'
 api_key = None
 def request(_query, params):
    '''pre-request callback
    params<dict>:
    - ``method`` : POST/GET
    - ``headers``: {}
    - ``data``: {}  # if method == POST
    - ``url``: ''
    - ``category``: 'search category'
    - ``pageno``: 1  # number of the requested page
    '''
    params['url'] = url
    params['method'] = 'POST'
    params['data'] = {'auth_key': api_key, 'text': params['query'], 'target_lang': params['to_lang'][1]}
    return params
 def response(resp):
    results = []
    result = loads(resp.text)
    translations = result['translations']
    infobox = "<dl>"
    for translation in translations:
        infobox += f"<dd>{translation['text']}</dd>"
    infobox += "</dl>"
    results.append(
        {
            'infobox': 'Deepl',
            'content': infobox,
        }
    )
    return results
@@ -0,0 +1,60 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Deezer (Music)
 """
 from json import loads
 from urllib.parse import urlencode
 # about
 about = {
    "website": 'https://deezer.com',
    "wikidata_id": 'Q602243',
    "official_api_documentation": 'https://developers.deezer.com/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 # engine dependent config
 categories = ['music']
 paging = True
 # search-url
 url = 'https://api.deezer.com/'
 search_url = url + 'search?{query}&index={offset}'
 iframe_src = "https://www.deezer.com/plugins/player?type=tracks&id={audioid}"
 # do search-request
 def request(query, params):
    offset = (params['pageno'] - 1) * 25
    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # parse results
    for result in search_res.get('data', []):
        if result['type'] == 'track':
            title = result['title']
            url = result['link']
            if url.startswith('http://'):
                url = 'https' + url[4:]
            content = '{} - {} - {}'.format(result['artist']['name'], result['album']['title'], result['title'])
            # append result
            results.append(
                {'url': url, 'title': title, 'iframe_src': iframe_src.format(audioid=result['id']), 'content': content}
            )
    # return results
    return results
@@ -0,0 +1,73 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Within this module we implement a *demo offline engine*.  Do not look to
 close to the implementation, its just a simple example.  To get in use of this
 *demo* engine add the following entry to your engines list in ``settings.yml``:
 .. code:: yaml
  - name: my offline engine
    engine: demo_offline
    shortcut: demo
    disabled: false
 """
 import json
 engine_type = 'offline'
 categories = ['general']
 disabled = True
 timeout = 2.0
 about = {
    "wikidata_id": None,
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
 }
 # if there is a need for globals, use a leading underline
 _my_offline_engine = None
 def init(engine_settings=None):
    """Initialization of the (offline) engine.  The origin of this demo engine is a
    simple json string which is loaded in this example while the engine is
    initialized.
    """
    global _my_offline_engine  # pylint: disable=global-statement
    _my_offline_engine = (
        '[ {"value": "%s"}'
        ', {"value":"first item"}'
        ', {"value":"second item"}'
        ', {"value":"third item"}'
        ']' % engine_settings.get('name')
    )
 def search(query, request_params):
    """Query (offline) engine and return results.  Assemble the list of results from
    your local engine.  In this demo engine we ignore the 'query' term, usual
    you would pass the 'query' term to your local engine to filter out the
    results.
    """
    ret_val = []
    result_list = json.loads(_my_offline_engine)
    for row in result_list:
        entry = {
            'query': query,
            'language': request_params['searxng_locale'],
            'value': row.get("value"),
            # choose a result template or comment out to use the *default*
            'template': 'key-value.html',
        }
        ret_val.append(entry)
    return ret_val
@@ -0,0 +1,100 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Within this module we implement a *demo online engine*.  Do not look to
 close to the implementation, its just a simple example which queries `The Art
 Institute of Chicago <https://www.artic.edu>`_
 To get in use of this *demo* engine add the following entry to your engines
 list in ``settings.yml``:
 .. code:: yaml
  - name: my online engine
    engine: demo_online
    shortcut: demo
    disabled: false
 """
 from json import loads
 from urllib.parse import urlencode
 engine_type = 'online'
 send_accept_language_header = True
 categories = ['general']
 disabled = True
 timeout = 2.0
 categories = ['images']
 paging = True
 page_size = 20
 search_api = 'https://api.artic.edu/api/v1/artworks/search?'
 image_api = 'https://www.artic.edu/iiif/2/'
 about = {
    "website": 'https://www.artic.edu',
    "wikidata_id": 'Q239303',
    "official_api_documentation": 'http://api.artic.edu/docs/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 # if there is a need for globals, use a leading underline
 _my_online_engine = None
 def init(engine_settings):
    """Initialization of the (online) engine.  If no initialization is needed, drop
    this init function.
    """
    global _my_online_engine  # pylint: disable=global-statement
    _my_online_engine = engine_settings.get('name')
 def request(query, params):
    """Build up the ``params`` for the online request.  In this example we build a
    URL to fetch images from `artic.edu <https://artic.edu>`__
    """
    args = urlencode(
        {
            'q': query,
            'page': params['pageno'],
            'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles',
            'limit': page_size,
        }
    )
    params['url'] = search_api + args
    return params
 def response(resp):
    """Parse out the result items from the response.  In this example we parse the
    response from `api.artic.edu <https://artic.edu>`__ and filter out all
    images.
    """
    results = []
    json_data = loads(resp.text)
    for result in json_data['data']:
        if not result['image_id']:
            continue
        results.append(
            {
                'url': 'https://artic.edu/artworks/%(id)s' % result,
                'title': result['title'] + " (%(date_display)s) //  %(artist_display)s" % result,
                'content': result['medium_display'],
                'author': ', '.join(result['artist_titles']),
                'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result,
                'img_format': result['dimensions'],
                'template': 'images.html',
            }
        )
    return results
@@ -0,0 +1,81 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 Deviantart (Images)
 """
 from urllib.parse import urlencode
 from lxml import html
 # about
 about = {
    "website": 'https://www.deviantart.com/',
    "wikidata_id": 'Q46523',
    "official_api_documentation": 'https://www.deviantart.com/developers/',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['images']
 paging = True
 time_range_support = True
 time_range_dict = {
    'day': 'popular-24-hours',
    'week': 'popular-1-week',
    'month': 'popular-1-month',
    'year': 'most-recent',
 }
 # search-url
 base_url = 'https://www.deviantart.com'
 def request(query, params):
    # https://www.deviantart.com/search/deviations?page=5&q=foo
    query = {
        'page': params['pageno'],
        'q': query,
    }
    if params['time_range'] in time_range_dict:
        query['order'] = time_range_dict[params['time_range']]
    params['url'] = base_url + '/search/deviations?' + urlencode(query)
    return params
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    for row in dom.xpath('//div[contains(@data-hook, "content_row")]'):
        for result in row.xpath('./div'):
            a_tag = result.xpath('.//a[@data-hook="deviation_link"]')[0]
            noscript_tag = a_tag.xpath('.//noscript')
            if noscript_tag:
                img_tag = noscript_tag[0].xpath('.//img')
            else:
                img_tag = a_tag.xpath('.//img')
            if not img_tag:
                continue
            img_tag = img_tag[0]
            results.append(
                {
                    'template': 'images.html',
                    'url': a_tag.attrib.get('href'),
                    'img_src': img_tag.attrib.get('src'),
                    'title': img_tag.attrib.get('alt'),
                }
            )
    return results
@@ -0,0 +1,60 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Dictzone
 """
 from urllib.parse import urljoin
 from lxml import html
 from searx.utils import eval_xpath
 # about
 about = {
    "website": 'https://dictzone.com/',
    "wikidata_id": None,
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 engine_type = 'online_dictionary'
 categories = ['general']
 url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 weight = 100
 results_xpath = './/table[@id="r"]/tr'
 https_support = True
 def request(query, params):
    params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query'])
    return params
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
        try:
            from_result, to_results_raw = eval_xpath(result, './td')
        except:
            continue
        to_results = []
        for to_result in eval_xpath(to_results_raw, './p/a'):
            t = to_result.text_content()
            if t.strip():
                to_results.append(to_result.text_content())
        results.append(
            {
                'url': urljoin(str(resp.url), '?%d' % k),
                'title': from_result.text_content(),
                'content': '; '.join(to_results),
            }
        )
    return results
@@ -0,0 +1,64 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 DigBT (Videos, Music, Files)
 """
 from urllib.parse import urljoin
 from lxml import html
 from searx.utils import extract_text, get_torrent_size
 # about
 about = {
    "website": 'https://digbt.org',
    "wikidata_id": None,
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 categories = ['videos', 'music', 'files']
 paging = True
 URL = 'https://digbt.org'
 SEARCH_URL = URL + '/search/{query}-time-{pageno}'
 FILESIZE = 3
 FILESIZE_MULTIPLIER = 4
 def request(query, params):
    params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
    return params
 def response(resp):
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('.//td[@class="x-item"]')
    if not search_res:
        return list()
    results = list()
    for result in search_res:
        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
        title = extract_text(result.xpath('.//a[@title]'))
        content = extract_text(result.xpath('.//div[@class="files"]'))
        files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
        filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
        magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
        results.append(
            {
                'url': url,
                'title': title,
                'content': content,
                'filesize': filesize,
                'magnetlink': magnetlink,
                'seed': 'N/A',
                'leech': 'N/A',
                'template': 'torrent.html',
            }
        )
    return results
@@ -0,0 +1,63 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Docker Hub (IT)
 """
 # pylint: disable=use-dict-literal
 from json import loads
 from urllib.parse import urlencode
 from dateutil import parser
 about = {
    "website": 'https://hub.docker.com',
    "wikidata_id": 'Q100769064',
    "official_api_documentation": 'https://docs.docker.com/registry/spec/api/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 categories = ['it']  # optional
 paging = True
 base_url = "https://hub.docker.com/"
 search_url = base_url + "api/content/v1/products/search?{query}&type=image&page_size=25"
 def request(query, params):
    params['url'] = search_url.format(query=urlencode(dict(q=query, page=params["pageno"])))
    params["headers"]["Search-Version"] = "v3"
    return params
 def response(resp):
    '''post-response callback
    resp: requests response object
    '''
    results = []
    body = loads(resp.text)
    # Make sure `summaries` isn't `null`
    search_res = body.get("summaries")
    if search_res:
        for item in search_res:
            result = {}
            # Make sure correct URL is set
            filter_type = item.get("filter_type")
            is_official = filter_type in ["store", "official"]
            if is_official:
                result["url"] = base_url + "_/" + item.get('slug', "")
            else:
                result["url"] = base_url + "r/" + item.get('slug', "")
            result["title"] = item.get("name")
            result["content"] = item.get("short_description")
            result["publishedDate"] = parser.parse(item.get("updated_at") or item.get("created_at"))
            result["thumbnail"] = item["logo_url"].get("large") or item["logo_url"].get("small")
            results.append(result)
    return results
@@ -0,0 +1,86 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Doku Wiki
 """
 from urllib.parse import urlencode
 from lxml.html import fromstring
 from searx.utils import extract_text, eval_xpath
 # about
 about = {
    "website": 'https://www.dokuwiki.org/',
    "wikidata_id": 'Q851864',
    "official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
 paging = False
 number_of_results = 5
 # search-url
 # Doku is OpenSearch compatible
 base_url = 'http://localhost:8090'
 search_url = (
    # fmt: off
    '/?do=search'
    '&{query}'
    # fmt: on
 )
 # TODO  '&startRecord={offset}'
 # TODO  '&maximumRecords={limit}'
 # do search-request
 def request(query, params):
    params['url'] = base_url + search_url.format(query=urlencode({'id': query}))
    return params
 # get response from search-request
 def response(resp):
    results = []
    doc = fromstring(resp.text)
    # parse results
    # Quickhits
    for r in eval_xpath(doc, '//div[@class="search_quickresult"]/ul/li'):
        try:
            res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
        except:
            continue
        if not res_url:
            continue
        title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
        # append result
        results.append({'title': title, 'content': "", 'url': base_url + res_url})
    # Search results
    for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
        try:
            if r.tag == "dt":
                res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
                title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
            elif r.tag == "dd":
                content = extract_text(eval_xpath(r, '.'))
                # append result
                results.append({'title': title, 'content': content, 'url': base_url + res_url})
        except:
            continue
        if not res_url:
            continue
    # return results
    return results
@@ -0,0 +1,437 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 DuckDuckGo Lite
 ~~~~~~~~~~~~~~~
 """
 from typing import TYPE_CHECKING
 import re
 from urllib.parse import urlencode
 import json
 import babel
 import lxml.html
 from searx import (
    locales,
    redislib,
    external_bang,
 )
 from searx.utils import (
    eval_xpath,
    eval_xpath_getindex,
    extract_text,
 )
 from searx.network import get  # see https://github.com/searxng/searxng/issues/762
 from searx import redisdb
 from searx.enginelib.traits import EngineTraits
 from searx.exceptions import SearxEngineAPIException
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://lite.duckduckgo.com/lite/',
    "wikidata_id": 'Q12805',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 send_accept_language_header = True
 """DuckDuckGo-Lite tries to guess user's prefered language from the HTTP
 ``Accept-Language``.  Optional the user can select a region filter (but not a
 language).
 """
 # engine dependent config
 categories = ['general', 'web']
 paging = True
 time_range_support = True
 safesearch = True  # user can't select but the results are filtered
 url = 'https://lite.duckduckgo.com/lite/'
 # url_ping = 'https://duckduckgo.com/t/sl_l'
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'}
 def cache_vqd(query, value):
    """Caches a ``vqd`` value from a query.
    The vqd value depends on the query string and is needed for the follow up
    pages or the images loaded by a XMLHttpRequest:
    - DuckDuckGo Web: `https://links.duckduckgo.com/d.js?q=...&vqd=...`
    - DuckDuckGo Images: `https://duckduckgo.com/i.js??q=...&vqd=...`
    """
    c = redisdb.client()
    if c:
        logger.debug("cache vqd value: %s", value)
        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
        c.set(key, value, ex=600)
 def get_vqd(query, headers):
    """Returns the ``vqd`` that fits to the *query*.  If there is no ``vqd`` cached
    (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the
    response.
    """
    value = None
    c = redisdb.client()
    if c:
        key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query)
        value = c.get(key)
        if value:
            value = value.decode('utf-8')
            logger.debug("re-use cached vqd value: %s", value)
            return value
    query_url = 'https://duckduckgo.com/?q={query}&atb=v290-5'.format(query=urlencode({'q': query}))
    res = get(query_url, headers=headers)
    content = res.text  # type: ignore
    if content.find('vqd=\"') == -1:
        raise SearxEngineAPIException('Request failed')
    value = content[content.find('vqd=\"') + 5 :]
    value = value[: value.find('\'')]
    logger.debug("new vqd value: %s", value)
    cache_vqd(query, value)
    return value
 def get_ddg_lang(eng_traits: EngineTraits, sxng_locale, default='en_US'):
    """Get DuckDuckGo's language identifier from SearXNG's locale.
    DuckDuckGo defines its lanaguages by region codes (see
    :py:obj:`fetch_traits`).
    To get region and language of a DDG service use:
    .. code: python
       eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
       eng_lang = get_ddg_lang(traits, params['searxng_locale'])
    It might confuse, but the ``l`` value of the cookie is what SearXNG calls
    the *region*:
    .. code:: python
        # !ddi paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
        params['cookies']['ad'] = eng_lang
        params['cookies']['ah'] = eng_region
        params['cookies']['l'] = eng_region
    .. hint::
       `DDG-lite <https://lite.duckduckgo.com/lite>`__ does not offer a language
       selection to the user, only a region can be selected by the user
       (``eng_region`` from the example above).  DDG-lite stores the selected
       region in a cookie::
         params['cookies']['kl'] = eng_region  # 'ar-es'
    """
    return eng_traits.custom['lang_region'].get(  # type: ignore
        sxng_locale, eng_traits.get_language(sxng_locale, default)
    )
 ddg_reg_map = {
    'tw-tzh': 'zh_TW',
    'hk-tzh': 'zh_HK',
    'ct-ca': 'skip',  # ct-ca and es-ca both map to ca_ES
    'es-ca': 'ca_ES',
    'id-en': 'id_ID',
    'no-no': 'nb_NO',
    'jp-jp': 'ja_JP',
    'kr-kr': 'ko_KR',
    'xa-ar': 'ar_SA',
    'sl-sl': 'sl_SI',
    'th-en': 'th_TH',
    'vn-en': 'vi_VN',
 }
 ddg_lang_map = {
    # use ar --> ar_EG (Egypt's arabic)
    "ar_DZ": 'lang_region',
    "ar_JO": 'lang_region',
    "ar_SA": 'lang_region',
    # use bn --> bn_BD
    'bn_IN': 'lang_region',
    # use de --> de_DE
    'de_CH': 'lang_region',
    # use en --> en_US,
    'en_AU': 'lang_region',
    'en_CA': 'lang_region',
    'en_GB': 'lang_region',
    # Esperanto
    'eo_XX': 'eo',
    # use es --> es_ES,
    'es_AR': 'lang_region',
    'es_CL': 'lang_region',
    'es_CO': 'lang_region',
    'es_CR': 'lang_region',
    'es_EC': 'lang_region',
    'es_MX': 'lang_region',
    'es_PE': 'lang_region',
    'es_UY': 'lang_region',
    'es_VE': 'lang_region',
    # use fr --> rf_FR
    'fr_CA': 'lang_region',
    'fr_CH': 'lang_region',
    'fr_BE': 'lang_region',
    # use nl --> nl_NL
    'nl_BE': 'lang_region',
    # use pt --> pt_PT
    'pt_BR': 'lang_region',
    # skip these languages
    'od_IN': 'skip',
    'io_XX': 'skip',
    'tokipona_XX': 'skip',
 }
 def request(query, params):
    # quote ddg bangs
    query_parts = []
    # for val in re.split(r'(\s+)', query):
    for val in re.split(r'(\s+)', query):
        if not val.strip():
            continue
        if val.startswith('!') and external_bang.get_node(external_bang.EXTERNAL_BANGS, val[1:]):
            val = f"'{val}'"
        query_parts.append(val)
    query = ' '.join(query_parts)
    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
    # eng_lang = get_ddg_lang(traits, params['searxng_locale'])
    params['url'] = url
    params['method'] = 'POST'
    params['data']['q'] = query
    # The API is not documented, so we do some reverse engineering and emulate
    # what https://lite.duckduckgo.com/lite/ does when you press "next Page"
    # link again and again ..
    params['headers']['Content-Type'] = 'application/x-www-form-urlencoded'
    params['headers']['Referer'] = 'https://google.com/'
    # initial page does not have an offset
    if params['pageno'] == 2:
        # second page does have an offset of 30
        offset = (params['pageno'] - 1) * 30
        params['data']['s'] = offset
        params['data']['dc'] = offset + 1
    elif params['pageno'] > 2:
        # third and following pages do have an offset of 30 + n*50
        offset = 30 + (params['pageno'] - 2) * 50
        params['data']['s'] = offset
        params['data']['dc'] = offset + 1
    # request needs a vqd argument
    params['data']['vqd'] = get_vqd(query, params["headers"])
    # initial page does not have additional data in the input form
    if params['pageno'] > 1:
        params['data']['o'] = form_data.get('o', 'json')
        params['data']['api'] = form_data.get('api', 'd.js')
        params['data']['nextParams'] = form_data.get('nextParams', '')
        params['data']['v'] = form_data.get('v', 'l')
    params['data']['kl'] = eng_region
    params['cookies']['kl'] = eng_region
    params['data']['df'] = ''
    if params['time_range'] in time_range_dict:
        params['data']['df'] = time_range_dict[params['time_range']]
        params['cookies']['df'] = time_range_dict[params['time_range']]
    logger.debug("param data: %s", params['data'])
    logger.debug("param cookies: %s", params['cookies'])
    return params
 def response(resp):
    if resp.status_code == 303:
        return []
    results = []
    doc = lxml.html.fromstring(resp.text)
    result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
    if len(result_table) == 2:
        # some locales (at least China) does not have a "next page" button and
        # the layout of the HTML tables is different.
        result_table = result_table[1]
    elif not len(result_table) >= 3:
        # no more results
        return []
    else:
        result_table = result_table[2]
        # update form data from response
        form = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table//input/..')
        if len(form):
            form = form[0]
            form_data['v'] = eval_xpath(form, '//input[@name="v"]/@value')[0]
            form_data['api'] = eval_xpath(form, '//input[@name="api"]/@value')[0]
            form_data['o'] = eval_xpath(form, '//input[@name="o"]/@value')[0]
            logger.debug('form_data: %s', form_data)
            value = eval_xpath(form, '//input[@name="vqd"]/@value')[0]
            query = resp.search_params['data']['q']
            cache_vqd(query, value)
    tr_rows = eval_xpath(result_table, './/tr')
    # In the last <tr> is the form of the 'previous/next page' links
    tr_rows = tr_rows[:-1]
    len_tr_rows = len(tr_rows)
    offset = 0
    while len_tr_rows >= offset + 4:
        # assemble table rows we need to scrap
        tr_title = tr_rows[offset]
        tr_content = tr_rows[offset + 1]
        offset += 4
        # ignore sponsored Adds <tr class="result-sponsored">
        if tr_content.get('class') == 'result-sponsored':
            continue
        a_tag = eval_xpath_getindex(tr_title, './/td//a[@class="result-link"]', 0, None)
        if a_tag is None:
            continue
        td_content = eval_xpath_getindex(tr_content, './/td[@class="result-snippet"]', 0, None)
        if td_content is None:
            continue
        results.append(
            {
                'title': a_tag.text_content(),
                'content': extract_text(td_content),
                'url': a_tag.get('href'),
            }
        )
    return results
 def fetch_traits(engine_traits: EngineTraits):
    """Fetch languages & regions from DuckDuckGo.
    SearXNG's ``all`` locale maps DuckDuckGo's "Alle regions" (``wt-wt``).
    DuckDuckGo's language "Browsers prefered language" (``wt_WT``) makes no
    sense in a SearXNG request since SearXNG's ``all`` will not add a
    ``Accept-Language`` HTTP header.  The value in ``engine_traits.all_locale``
    is ``wt-wt`` (the region).
    Beside regions DuckDuckGo also defines its lanaguages by region codes.  By
    example these are the english languages in DuckDuckGo:
    - en_US
    - en_AU
    - en_CA
    - en_GB
    The function :py:obj:`get_ddg_lang` evaluates DuckDuckGo's language from
    SearXNG's locale.
    """
    # pylint: disable=too-many-branches, too-many-statements
    # fetch regions
    engine_traits.all_locale = 'wt-wt'
    # updated from u588 to u661 / should be updated automatically?
    resp = get('https://duckduckgo.com/util/u661.js')
    if not resp.ok:  # type: ignore
        print("ERROR: response from DuckDuckGo is not OK.")
    pos = resp.text.find('regions:{') + 8  # type: ignore
    js_code = resp.text[pos:]  # type: ignore
    pos = js_code.find('}') + 1
    regions = json.loads(js_code[:pos])
    for eng_tag, name in regions.items():
        if eng_tag == 'wt-wt':
            engine_traits.all_locale = 'wt-wt'
            continue
        region = ddg_reg_map.get(eng_tag)
        if region == 'skip':
            continue
        if not region:
            eng_territory, eng_lang = eng_tag.split('-')
            region = eng_lang + '_' + eng_territory.upper()
        try:
            sxng_tag = locales.region_tag(babel.Locale.parse(region))
        except babel.UnknownLocaleError:
            print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
            continue
        conflict = engine_traits.regions.get(sxng_tag)
        if conflict:
            if conflict != eng_tag:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
            continue
        engine_traits.regions[sxng_tag] = eng_tag
    # fetch languages
    engine_traits.custom['lang_region'] = {}
    pos = resp.text.find('languages:{') + 10  # type: ignore
    js_code = resp.text[pos:]  # type: ignore
    pos = js_code.find('}') + 1
    js_code = '{"' + js_code[1:pos].replace(':', '":').replace(',', ',"')
    languages = json.loads(js_code)
    for eng_lang, name in languages.items():
        if eng_lang == 'wt_WT':
            continue
        babel_tag = ddg_lang_map.get(eng_lang, eng_lang)
        if babel_tag == 'skip':
            continue
        try:
            if babel_tag == 'lang_region':
                sxng_tag = locales.region_tag(babel.Locale.parse(eng_lang))
                engine_traits.custom['lang_region'][sxng_tag] = eng_lang
                continue
            sxng_tag = locales.language_tag(babel.Locale.parse(babel_tag))
        except babel.UnknownLocaleError:
            print("ERROR: language %s (%s) is unknown by babel" % (name, eng_lang))
            continue
        conflict = engine_traits.languages.get(sxng_tag)
        if conflict:
            if conflict != eng_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_lang))
            continue
        engine_traits.languages[sxng_tag] = eng_lang
@@ -0,0 +1,255 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 DuckDuckGo Instant Answer API
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
 reverse engineering we can see that some services (e.g. instant answers) still
 in use from the DDG search engine.
 As far we can say the *instant answers* API does not support languages, or at
 least we could not find out how language support should work.  It seems that
 most of the features are based on English terms.
 """
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode, urlparse, urljoin
 from lxml import html
 from searx.data import WIKIDATA_UNITS
 from searx.utils import extract_text, html_to_text, get_string_replaces_function
 from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 # about
 about = {
    "website": 'https://duckduckgo.com/',
    "wikidata_id": 'Q12805',
    "official_api_documentation": 'https://duckduckgo.com/api',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 send_accept_language_header = True
 URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
 WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/']
 replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
 def is_broken_text(text):
    """duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
    The href URL is broken, the "Related website" may contains some HTML.
    The best solution seems to ignore these results.
    """
    return text.startswith('http') and ' ' in text
 def result_to_text(text, htmlResult):
    # TODO : remove result ending with "Meaning" or "Category"  # pylint: disable=fixme
    result = None
    dom = html.fromstring(htmlResult)
    a = dom.xpath('//a')
    if len(a) >= 1:
        result = extract_text(a[0])
    else:
        result = text
    if not is_broken_text(result):
        return result
    return None
 def request(query, params):
    params['url'] = URL.format(query=urlencode({'q': query}))
    return params
 def response(resp):
    # pylint: disable=too-many-locals, too-many-branches, too-many-statements
    results = []
    search_res = resp.json()
    # search_res.get('Entity') possible values (not exhaustive) :
    # * continent / country / department / location / waterfall
    # * actor / musician / artist
    # * book / performing art / film / television  / media franchise / concert tour / playwright
    # * prepared food
    # * website / software / os / programming language / file format / software engineer
    # * company
    content = ''
    heading = search_res.get('Heading', '')
    attributes = []
    urls = []
    infobox_id = None
    relatedTopics = []
    # add answer if there is one
    answer = search_res.get('Answer', '')
    if answer:
        logger.debug('AnswerType="%s" Answer="%s"', search_res.get('AnswerType'), answer)
        if search_res.get('AnswerType') not in ['calc', 'ip']:
            results.append({'answer': html_to_text(answer)})
    # add infobox
    if 'Definition' in search_res:
        content = content + search_res.get('Definition', '')
    if 'Abstract' in search_res:
        content = content + search_res.get('Abstract', '')
    # image
    image = search_res.get('Image')
    image = None if image == '' else image
    if image is not None and urlparse(image).netloc == '':
        image = urljoin('https://duckduckgo.com', image)
    # urls
    # Official website, Wikipedia page
    for ddg_result in search_res.get('Results', []):
        firstURL = ddg_result.get('FirstURL')
        text = ddg_result.get('Text')
        if firstURL is not None and text is not None:
            urls.append({'title': text, 'url': firstURL})
            results.append({'title': heading, 'url': firstURL})
    # related topics
    for ddg_result in search_res.get('RelatedTopics', []):
        if 'FirstURL' in ddg_result:
            firstURL = ddg_result.get('FirstURL')
            text = ddg_result.get('Text')
            if not is_broken_text(text):
                suggestion = result_to_text(text, ddg_result.get('Result'))
                if suggestion != heading and suggestion is not None:
                    results.append({'suggestion': suggestion})
        elif 'Topics' in ddg_result:
            suggestions = []
            relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions})
            for topic_result in ddg_result.get('Topics', []):
                suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result'))
                if suggestion != heading and suggestion is not None:
                    suggestions.append(suggestion)
    # abstract
    abstractURL = search_res.get('AbstractURL', '')
    if abstractURL != '':
        # add as result ? problem always in english
        infobox_id = abstractURL
        urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True})
        results.append({'url': abstractURL, 'title': heading})
    # definition
    definitionURL = search_res.get('DefinitionURL', '')
    if definitionURL != '':
        # add as result ? as answer ? problem always in english
        infobox_id = definitionURL
        urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
    # to merge with wikidata's infobox
    if infobox_id:
        infobox_id = replace_http_by_https(infobox_id)
    # attributes
    # some will be converted to urls
    if 'Infobox' in search_res:
        infobox = search_res.get('Infobox')
        if 'content' in infobox:
            osm_zoom = 17
            coordinates = None
            for info in infobox.get('content'):
                data_type = info.get('data_type')
                data_label = info.get('label')
                data_value = info.get('value')
                # Workaround: ddg may return a double quote
                if data_value == '""':
                    continue
                # Is it an external URL ?
                # * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile
                # * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id
                # * netflix_id
                external_url = get_external_url(data_type, data_value)
                if external_url is not None:
                    urls.append({'title': data_label, 'url': external_url})
                elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']:
                    # ignore instance: Wikidata value from "Instance Of" (Qxxxx)
                    # ignore wiki_maps_trigger: reference to a javascript
                    # ignore google_play_artist_id: service shutdown
                    pass
                elif data_type == 'string' and data_label == 'Website':
                    # There is already an URL for the website
                    pass
                elif data_type == 'area':
                    attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'})
                    osm_zoom = area_to_osm_zoom(data_value.get('amount'))
                elif data_type == 'coordinates':
                    if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2':
                        # coordinate on Earth
                        # get the zoom information from the area
                        coordinates = info
                    else:
                        # coordinate NOT on Earth
                        attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'})
                elif data_type == 'string':
                    attributes.append({'label': data_label, 'value': data_value})
            if coordinates:
                data_label = coordinates.get('label')
                data_value = coordinates.get('value')
                latitude = data_value.get('latitude')
                longitude = data_value.get('longitude')
                url = get_earth_coordinates_url(latitude, longitude, osm_zoom)
                urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'})
    if len(heading) > 0:
        # TODO get infobox.meta.value where .label='article_title'    # pylint: disable=fixme
        if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0:
            results.append({'url': urls[0]['url'], 'title': heading, 'content': content})
        else:
            results.append(
                {
                    'infobox': heading,
                    'id': infobox_id,
                    'content': content,
                    'img_src': image,
                    'attributes': attributes,
                    'urls': urls,
                    'relatedTopics': relatedTopics,
                }
            )
    return results
 def unit_to_str(unit):
    for prefix in WIKIDATA_PREFIX:
        if unit.startswith(prefix):
            wikidata_entity = unit[len(prefix) :]
            return WIKIDATA_UNITS.get(wikidata_entity, unit)
    return unit
 def area_to_str(area):
    """parse ``{'unit': 'https://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}``"""
    unit = unit_to_str(area.get('unit'))
    if unit is not None:
        try:
            amount = float(area.get('amount'))
            return '{} {}'.format(amount, unit)
        except ValueError:
            pass
    return '{} {}'.format(area.get('amount', ''), area.get('unit', ''))
@@ -0,0 +1,100 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 DuckDuckGo Images
 ~~~~~~~~~~~~~~~~~
 """
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode
 from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
 from searx.engines.duckduckgo import (
    get_ddg_lang,
    get_vqd,
 )
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://duckduckgo.com/',
    "wikidata_id": 'Q12805',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON (site requires js to get images)',
 }
 # engine dependent config
 categories = ['images', 'web']
 paging = True
 safesearch = True
 send_accept_language_header = True
 safesearch_cookies = {0: '-2', 1: None, 2: '1'}
 safesearch_args = {0: '1', 1: None, 2: '1'}
 def request(query, params):
    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
    eng_lang = get_ddg_lang(traits, params['searxng_locale'])
    args = {
        'q': query,
        'o': 'json',
        # 'u': 'bing',
        'l': eng_region,
        'vqd': get_vqd(query, params["headers"]),
    }
    if params['pageno'] > 1:
        args['s'] = (params['pageno'] - 1) * 100
    params['cookies']['ad'] = eng_lang  # zh_CN
    params['cookies']['ah'] = eng_region  # "us-en,de-de"
    params['cookies']['l'] = eng_region  # "hk-tzh"
    logger.debug("cookies: %s", params['cookies'])
    safe_search = safesearch_cookies.get(params['safesearch'])
    if safe_search is not None:
        params['cookies']['p'] = safe_search  # "-2", "1"
    safe_search = safesearch_args.get(params['safesearch'])
    if safe_search is not None:
        args['p'] = safe_search  # "-1", "1"
    args = urlencode(args)
    params['url'] = 'https://duckduckgo.com/i.js?{args}&f={f}'.format(args=args, f=',,,,,')
    params['headers']['Accept'] = 'application/json, text/javascript, */*; q=0.01'
    params['headers']['Referer'] = 'https://duckduckgo.com/'
    params['headers']['X-Requested-With'] = 'XMLHttpRequest'
    logger.debug("headers: %s", params['headers'])
    return params
 def response(resp):
    results = []
    res_json = resp.json()
    for result in res_json['results']:
        results.append(
            {
                'template': 'images.html',
                'title': result['title'],
                'content': '',
                'thumbnail_src': result['thumbnail'],
                'img_src': result['image'],
                'url': result['url'],
                'img_format': '%s x %s' % (result['width'], result['height']),
                'source': result['source'],
            }
        )
    return results
@@ -0,0 +1,163 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """
 DuckDuckGo Weather
 ~~~~~~~~~~~~~~~~~~
 """
 from typing import TYPE_CHECKING
 from json import loads
 from urllib.parse import quote
 from datetime import datetime
 from flask_babel import gettext
 from searx.engines.duckduckgo import fetch_traits  # pylint: disable=unused-import
 from searx.engines.duckduckgo import get_ddg_lang
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 about = {
    "website": 'https://duckduckgo.com/',
    "wikidata_id": 'Q12805',
    "official_api_documentation": None,
    "use_official_api": True,
    "require_api_key": False,
    "results": "JSON",
 }
 send_accept_language_header = True
 # engine dependent config
 categories = ["weather"]
 URL = "https://duckduckgo.com/js/spice/forecast/{query}/{lang}"
 def generate_condition_table(condition):
    res = ""
    res += f"<tr><td><b>{gettext('Condition')}</b></td>" f"<td><b>{condition['summary']}</b></td></tr>"
    res += (
        f"<tr><td><b>{gettext('Temperature')}</b></td>"
        f"<td><b>{f_to_c(condition['temperature'])}°C / {condition['temperature']}°F</b></td></tr>"
    )
    res += (
        f"<tr><td>{gettext('Feels like')}</td><td>{f_to_c(condition['apparentTemperature'])}°C / "
        f"{condition['apparentTemperature']}°F</td></tr>"
    )
    res += (
        f"<tr><td>{gettext('Wind')}</td><td>{condition['windBearing']}° — "
        f"{(condition['windSpeed'] * 1.6093440006147):.2f} km/h / {condition['windSpeed']} mph</td></tr>"
    )
    res += f"<tr><td>{gettext('Visibility')}</td><td>{condition['visibility']} km</td>"
    res += f"<tr><td>{gettext('Humidity')}</td><td>{(condition['humidity'] * 100):.1f}%</td></tr>"
    return res
 def generate_day_table(day):
    res = ""
    res += (
        f"<tr><td>{gettext('Min temp.')}</td><td>{f_to_c(day['temperatureLow'])}°C / "
        f"{day['temperatureLow']}°F</td></tr>"
    )
    res += (
        f"<tr><td>{gettext('Max temp.')}</td><td>{f_to_c(day['temperatureHigh'])}°C / "
        f"{day['temperatureHigh']}°F</td></tr>"
    )
    res += f"<tr><td>{gettext('UV index')}</td><td>{day['uvIndex']}</td></tr>"
    res += (
        f"<tr><td>{gettext('Sunrise')}</td><td>{datetime.fromtimestamp(day['sunriseTime']).strftime('%H:%M')}</td></tr>"
    )
    res += (
        f"<tr><td>{gettext('Sunset')}</td><td>{datetime.fromtimestamp(day['sunsetTime']).strftime('%H:%M')}</td></tr>"
    )
    return res
 def request(query, params):
    eng_region = traits.get_region(params['searxng_locale'], traits.all_locale)
    eng_lang = get_ddg_lang(traits, params['searxng_locale'])
    # !ddw paris :es-AR --> {'ad': 'es_AR', 'ah': 'ar-es', 'l': 'ar-es'}
    params['cookies']['ad'] = eng_lang
    params['cookies']['ah'] = eng_region
    params['cookies']['l'] = eng_region
    logger.debug("cookies: %s", params['cookies'])
    params["url"] = URL.format(query=quote(query), lang=eng_lang.split('_')[0])
    return params
 def f_to_c(temperature):
    return "%.2f" % ((temperature - 32) / 1.8)
 def response(resp):
    results = []
    if resp.text.strip() == "ddg_spice_forecast();":
        return []
    result = loads(resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2])
    current = result["currently"]
    title = result['flags']['ddg-location']
    infobox = f"<h3>{gettext('Current condition')}</h3><table><tbody>"
    infobox += generate_condition_table(current)
    infobox += "</tbody></table>"
    last_date = None
    for time in result['hourly']['data']:
        current_time = datetime.fromtimestamp(time['time'])
        if last_date != current_time.date():
            if last_date is not None:
                infobox += "</tbody></table>"
            infobox += f"<h3>{current_time.strftime('%Y-%m-%d')}</h3>"
            infobox += "<table><tbody>"
            for day in result['daily']['data']:
                if datetime.fromtimestamp(day['time']).date() == current_time.date():
                    infobox += generate_day_table(day)
            infobox += "</tbody></table><table><tbody>"
        last_date = current_time.date()
        infobox += f"<tr><td rowspan=\"7\"><b>{current_time.strftime('%H:%M')}</b></td></tr>"
        infobox += generate_condition_table(time)
    infobox += "</tbody></table>"
    results.append(
        {
            "infobox": title,
            "content": infobox,
        }
    )
    return results
@@ -0,0 +1,83 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Duden
 """
 import re
 from urllib.parse import quote, urljoin
 from lxml import html
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 from searx.network import raise_for_httperror
 # about
 about = {
    "website": 'https://www.duden.de',
    "wikidata_id": 'Q73624591',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
    "language": 'de',
 }
 categories = ['dictionaries']
 paging = True
 # search-url
 base_url = 'https://www.duden.de/'
 search_url = base_url + 'suchen/dudenonline/{query}?search_api_fulltext=&page={offset}'
 def request(query, params):
    '''pre-request callback
    params<dict>:
      method  : POST/GET
      headers : {}
      data    : {} # if method == POST
      url     : ''
      category: 'search category'
      pageno  : 1 # number of the requested page
    '''
    offset = params['pageno'] - 1
    if offset == 0:
        search_url_fmt = base_url + 'suchen/dudenonline/{query}'
        params['url'] = search_url_fmt.format(query=quote(query))
    else:
        params['url'] = search_url.format(offset=offset, query=quote(query))
    # after the last page of results, spelling corrections are returned after a HTTP redirect
    # whatever the page number is
    params['soft_max_redirects'] = 1
    params['raise_for_httperror'] = False
    return params
 def response(resp):
    '''post-response callback
    resp: requests response object
    '''
    results = []
    if resp.status_code == 404:
        return results
    raise_for_httperror(resp)
    dom = html.fromstring(resp.text)
    number_of_results_element = eval_xpath_getindex(
        dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None
    )
    if number_of_results_element is not None:
        number_of_results_string = re.sub('[^0-9]', '', number_of_results_element)
        results.append({'number_of_results': int(number_of_results_string)})
    for result in eval_xpath_list(dom, '//section[not(contains(@class, "essay"))]'):
        url = eval_xpath_getindex(result, './/h2/a', 0).get('href')
        url = urljoin(base_url, url)
        title = eval_xpath(result, 'string(.//h2/a)').strip()
        content = extract_text(eval_xpath(result, './/p'))
        # append result
        results.append({'url': url, 'title': title, 'content': content})
    return results
@@ -0,0 +1,22 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Dummy Offline
 """
 # about
 about = {
    "wikidata_id": None,
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 def search(query, request_params):
    return [
        {
            'result': 'this is what you get',
        }
    ]
@@ -0,0 +1,24 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Dummy
 """
 # about
 about = {
    "website": None,
    "wikidata_id": None,
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'empty array',
 }
 # do search-request
 def request(query, params):
    return params
 # get response from search-request
 def response(resp):
    return []
@@ -0,0 +1,76 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Ebay (Videos, Music, Files)
 """
 from lxml import html
 from searx.engines.xpath import extract_text
 from urllib.parse import quote
 # about
 about = {
    "website": 'https://www.ebay.com',
    "wikidata_id": 'Q58024',
    "official_api_documentation": 'https://developer.ebay.com/',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 categories = ['shopping']
 paging = True
 # Set base_url in settings.yml in order to
 # have the desired local TLD.
 base_url = None
 search_url = '/sch/i.html?_nkw={query}&_sacat={pageno}'
 results_xpath = '//li[contains(@class, "s-item")]'
 url_xpath = './/a[@class="s-item__link"]/@href'
 title_xpath = './/h3[@class="s-item__title"]'
 content_xpath = './/div[@span="SECONDARY_INFO"]'
 price_xpath = './/div[contains(@class, "s-item__detail")]/span[@class="s-item__price"][1]/text()'
 shipping_xpath = './/span[contains(@class, "s-item__shipping")]/text()'
 source_country_xpath = './/span[contains(@class, "s-item__location")]/text()'
 thumbnail_xpath = './/img[@class="s-item__image-img"]/@src'
 def request(query, params):
    params['url'] = f'{base_url}' + search_url.format(query=quote(query), pageno=params['pageno'])
    return params
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    results_dom = dom.xpath(results_xpath)
    if not results_dom:
        return []
    for result_dom in results_dom:
        url = extract_text(result_dom.xpath(url_xpath))
        title = extract_text(result_dom.xpath(title_xpath))
        content = extract_text(result_dom.xpath(content_xpath))
        price = extract_text(result_dom.xpath(price_xpath))
        shipping = extract_text(result_dom.xpath(shipping_xpath))
        source_country = extract_text(result_dom.xpath(source_country_xpath))
        thumbnail = extract_text(result_dom.xpath(thumbnail_xpath))
        if title == "":
            continue
        results.append(
            {
                'url': url,
                'title': title,
                'content': content,
                'price': price,
                'shipping': shipping,
                'source_country': source_country,
                'thumbnail': thumbnail,
                'template': 'products.html',
            }
        )
    return results
@@ -0,0 +1,178 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """.. sidebar:: info
   - :origin:`elasticsearch.py <searx/engines/elasticsearch.py>`
   - `Elasticsearch <https://www.elastic.co/elasticsearch/>`_
   - `Elasticsearch Guide
     <https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html>`_
   - `Install Elasticsearch
     <https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html>`_
 Elasticsearch_ supports numerous ways to query the data it is storing.  At the
 moment the engine supports the most popular search methods (``query_type``):
 - ``match``,
 - ``simple_query_string``,
 - ``term`` and
 - ``terms``.
 If none of the methods fit your use case, you can select ``custom`` query type
 and provide the JSON payload to submit to Elasticsearch in
 ``custom_query_json``.
 Example
 =======
 The following is an example configuration for an Elasticsearch_ instance with
 authentication configured to read from ``my-index`` index.
 .. code:: yaml
  - name: elasticsearch
    shortcut: es
    engine: elasticsearch
    base_url: http://localhost:9200
    username: elastic
    password: changeme
    index: my-index
    query_type: match
    # custom_query_json: '{ ... }'
    enable_http: true
 """
 from json import loads, dumps
 from searx.exceptions import SearxEngineAPIException
 base_url = 'http://localhost:9200'
 username = ''
 password = ''
 index = ''
 search_url = base_url + '/' + index + '/_search'
 query_type = 'match'
 custom_query_json = {}
 show_metadata = False
 categories = ['general']
 def init(engine_settings):
    if 'query_type' in engine_settings and engine_settings['query_type'] not in _available_query_types:
        raise ValueError('unsupported query type', engine_settings['query_type'])
    if index == '':
        raise ValueError('index cannot be empty')
 def request(query, params):
    if query_type not in _available_query_types:
        return params
    if username and password:
        params['auth'] = (username, password)
    params['url'] = search_url
    params['method'] = 'GET'
    params['data'] = dumps(_available_query_types[query_type](query))
    params['headers']['Content-Type'] = 'application/json'
    return params
 def _match_query(query):
    """
    The standard for full text queries.
    searx format: "key:value" e.g. city:berlin
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html
    """
    try:
        key, value = query.split(':')
    except Exception as e:
        raise ValueError('query format must be "key:value"') from e
    return {"query": {"match": {key: {'query': value}}}}
 def _simple_query_string_query(query):
    """
    Accepts query strings, but it is less strict than query_string
    The field used can be specified in index.query.default_field in Elasticsearch.
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html
    """
    return {'query': {'simple_query_string': {'query': query}}}
 def _term_query(query):
    """
    Accepts one term and the name of the field.
    searx format: "key:value" e.g. city:berlin
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-term-query.html
    """
    try:
        key, value = query.split(':')
    except Exception as e:
        raise ValueError('query format must be key:value') from e
    return {'query': {'term': {key: value}}}
 def _terms_query(query):
    """
    Accepts multiple terms and the name of the field.
    searx format: "key:value1,value2" e.g. city:berlin,paris
    REF: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-terms-query.html
    """
    try:
        key, values = query.split(':')
    except Exception as e:
        raise ValueError('query format must be key:value1,value2') from e
    return {'query': {'terms': {key: values.split(',')}}}
 def _custom_query(query):
    key, value = query.split(':')
    custom_query = custom_query_json
    for query_key, query_value in custom_query.items():
        if query_key == '{{KEY}}':
            custom_query[key] = custom_query.pop(query_key)
        if query_value == '{{VALUE}}':
            custom_query[query_key] = value
    return custom_query
 def response(resp):
    results = []
    resp_json = loads(resp.text)
    if 'error' in resp_json:
        raise SearxEngineAPIException(resp_json['error'])
    for result in resp_json['hits']['hits']:
        r = {key: str(value) if not key.startswith('_') else value for key, value in result['_source'].items()}
        r['template'] = 'key-value.html'
        if show_metadata:
            r['metadata'] = {'index': result['_index'], 'id': result['_id'], 'score': result['_score']}
        results.append(r)
    return results
 _available_query_types = {
    # Full text queries
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html
    'match': _match_query,
    'simple_query_string': _simple_query_string_query,
    # Term-level queries
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html
    'term': _term_query,
    'terms': _terms_query,
    # Query JSON defined by the instance administrator.
    'custom': _custom_query,
 }
@@ -0,0 +1,67 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Emojipedia
 Emojipedia is an emoji reference website which documents the meaning and
 common usage of emoji characters in the Unicode Standard.  It is owned by Zedge
 since 2021. Emojipedia is a voting member of The Unicode Consortium.[1]
 [1] https://en.wikipedia.org/wiki/Emojipedia
 """
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import (
    eval_xpath_list,
    eval_xpath_getindex,
    extract_text,
 )
 about = {
    "website": 'https://emojipedia.org',
    "wikidata_id": 'Q22908129',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 categories = []
 paging = False
 time_range_support = False
 base_url = 'https://emojipedia.org'
 search_url = base_url + '/search/?{query}'
 def request(query, params):
    params['url'] = search_url.format(
        query=urlencode({'q': query}),
    )
    return params
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    for result in eval_xpath_list(dom, "//ol[@class='search-results']/li"):
        extracted_desc = extract_text(eval_xpath_getindex(result, './/p', 0))
        if 'No results found.' in extracted_desc:
            break
        link = eval_xpath_getindex(result, './/h2/a', 0)
        url = base_url + link.attrib.get('href')
        title = extract_text(link)
        content = extracted_desc
        res = {'url': url, 'title': title, 'content': content}
        results.append(res)
    return results
@@ -0,0 +1,54 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 F-Droid (a repository of FOSS applications for Android)
 """
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import extract_text
 # about
 about = {
    "website": 'https://f-droid.org/',
    "wikidata_id": 'Q1386210',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['files', 'apps']
 paging = True
 # search-url
 base_url = 'https://search.f-droid.org/'
 search_url = base_url + '?{query}'
 # do search-request
 def request(query, params):
    query = urlencode({'q': query, 'page': params['pageno'], 'lang': ''})
    params['url'] = search_url.format(query=query)
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    for app in dom.xpath('//a[@class="package-header"]'):
        app_url = app.xpath('./@href')[0]
        app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()'))
        app_content = (
            extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip()
            + ' - '
            + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip()
        )
        app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0]
        results.append({'url': app_url, 'title': app_title, 'content': app_content, 'img_src': app_img_src})
    return results
@@ -0,0 +1,97 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Flickr (Images)
 More info on api-key : https://www.flickr.com/services/apps/create/
 """
 from json import loads
 from urllib.parse import urlencode
 # about
 about = {
    "website": 'https://www.flickr.com',
    "wikidata_id": 'Q103204',
    "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
    "use_official_api": True,
    "require_api_key": True,
    "results": 'JSON',
 }
 categories = ['images']
 nb_per_page = 15
 paging = True
 api_key = None
 url = (
    'https://api.flickr.com/services/rest/?method=flickr.photos.search'
    + '&api_key={api_key}&{text}&sort=relevance'
    + '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z'
    + '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}'
 )
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 paging = True
 def build_flickr_url(user_id, photo_id):
    return photo_url.format(userid=user_id, photoid=photo_id)
 def request(query, params):
    params['url'] = url.format(
        text=urlencode({'text': query}), api_key=api_key, nb_per_page=nb_per_page, page=params['pageno']
    )
    return params
 def response(resp):
    results = []
    search_results = loads(resp.text)
    # return empty array if there are no results
    if 'photos' not in search_results:
        return []
    if 'photo' not in search_results['photos']:
        return []
    photos = search_results['photos']['photo']
    # parse results
    for photo in photos:
        if 'url_o' in photo:
            img_src = photo['url_o']
        elif 'url_z' in photo:
            img_src = photo['url_z']
        else:
            continue
        # For a bigger thumbnail, keep only the url_z, not the url_n
        if 'url_n' in photo:
            thumbnail_src = photo['url_n']
        elif 'url_z' in photo:
            thumbnail_src = photo['url_z']
        else:
            thumbnail_src = img_src
        url = build_flickr_url(photo['owner'], photo['id'])
        # append result
        results.append(
            {
                'url': url,
                'title': photo['title'],
                'img_src': img_src,
                'thumbnail_src': thumbnail_src,
                'content': photo['description']['_content'],
                'author': photo['ownername'],
                'template': 'images.html',
            }
        )
    # return results
    return results
@@ -0,0 +1,143 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Flickr (Images)
 """
 from typing import TYPE_CHECKING
 import json
 from time import time
 import re
 from urllib.parse import urlencode
 from searx.utils import ecma_unescape, html_to_text
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 # about
 about = {
    "website": 'https://www.flickr.com',
    "wikidata_id": 'Q103204',
    "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['images']
 paging = True
 time_range_support = True
 safesearch = False
 time_range_dict = {
    'day': 60 * 60 * 24,
    'week': 60 * 60 * 24 * 7,
    'month': 60 * 60 * 24 * 7 * 4,
    'year': 60 * 60 * 24 * 7 * 52,
 }
 image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'm', 'n', 't', 'q', 's')
 search_url = 'https://www.flickr.com/search?{query}&page={page}'
 time_range_url = '&min_upload_date={start}&max_upload_date={end}'
 photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
 modelexport_re = re.compile(r"^\s*modelExport:\s*({.*}),$", re.M)
 def build_flickr_url(user_id, photo_id):
    return photo_url.format(userid=user_id, photoid=photo_id)
 def _get_time_range_url(time_range):
    if time_range in time_range_dict:
        return time_range_url.format(start=time(), end=str(int(time()) - time_range_dict[time_range]))
    return ''
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'text': query}), page=params['pageno']) + _get_time_range_url(
        params['time_range']
    )
    return params
 def response(resp):  # pylint: disable=too-many-branches
    results = []
    matches = modelexport_re.search(resp.text)
    if matches is None:
        return results
    match = matches.group(1)
    model_export = json.loads(match)
    if 'legend' not in model_export:
        return results
    legend = model_export['legend']
    # handle empty page
    if not legend or not legend[0]:
        return results
    for x, index in enumerate(legend):
        if len(index) != 8:
            logger.debug("skip legend enty %s : %s", x, index)
            continue
        photo = model_export['main'][index[0]][int(index[1])][index[2]][index[3]][index[4]][index[5]][int(index[6])][
            index[7]
        ]
        author = ecma_unescape(photo.get('realname', ''))
        source = ecma_unescape(photo.get('username', ''))
        if source:
            source += ' @ Flickr'
        title = ecma_unescape(photo.get('title', ''))
        content = html_to_text(ecma_unescape(photo.get('description', '')))
        img_src = None
        # From the biggest to the lowest format
        size_data = None
        for image_size in image_sizes:
            if image_size in photo['sizes']['data']:
                size_data = photo['sizes']['data'][image_size]['data']
                break
        if not size_data:
            logger.debug('cannot find valid image size: {0}'.format(repr(photo['sizes']['data'])))
            continue
        img_src = size_data['url']
        img_format = f"{size_data['width']} x {size_data['height']}"
        # For a bigger thumbnail, keep only the url_z, not the url_n
        if 'n' in photo['sizes']['data']:
            thumbnail_src = photo['sizes']['data']['n']['data']['url']
        elif 'z' in photo['sizes']['data']:
            thumbnail_src = photo['sizes']['data']['z']['data']['url']
        else:
            thumbnail_src = img_src
        if 'ownerNsid' not in photo:
            # should not happen, disowned photo? Show it anyway
            url = img_src
        else:
            url = build_flickr_url(photo['ownerNsid'], photo['id'])
        result = {
            'url': url,
            'img_src': img_src,
            'thumbnail_src': thumbnail_src,
            'source': source,
            'img_format': img_format,
            'template': 'images.html',
        }
        result['author'] = author.encode(errors='ignore').decode()
        result['source'] = source.encode(errors='ignore').decode()
        result['title'] = title.encode(errors='ignore').decode()
        result['content'] = content.encode(errors='ignore').decode()
        results.append(result)
    return results
@@ -0,0 +1,68 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 FramaLibre (It)
 """
 from html import escape
 from urllib.parse import urljoin, urlencode
 from lxml import html
 from searx.utils import extract_text
 # about
 about = {
    "website": 'https://framalibre.org/',
    "wikidata_id": 'Q30213882',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['it']
 paging = True
 # search-url
 base_url = 'https://framalibre.org/'
 search_url = base_url + 'recherche-par-crit-res?{query}&page={offset}'
 # specific xpath variables
 results_xpath = '//div[@class="nodes-list-row"]/div[contains(@typeof,"sioc:Item")]'
 link_xpath = './/h3[@class="node-title"]/a[@href]'
 thumbnail_xpath = './/img[@class="media-object img-responsive"]/@src'
 content_xpath = './/div[@class="content"]//p'
 # do search-request
 def request(query, params):
    offset = params['pageno'] - 1
    params['url'] = search_url.format(query=urlencode({'keys': query}), offset=offset)
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath(results_xpath):
        link = result.xpath(link_xpath)[0]
        href = urljoin(base_url, link.attrib.get('href'))
        # there's also a span (class="rdf-meta element-hidden" property="dc:title")'s content property for this...
        title = escape(extract_text(link))
        thumbnail_tags = result.xpath(thumbnail_xpath)
        thumbnail = None
        if len(thumbnail_tags) > 0:
            thumbnail = extract_text(thumbnail_tags[0])
            if thumbnail[0] == '/':
                thumbnail = base_url + thumbnail
        content = escape(extract_text(result.xpath(content_xpath)))
        # append result
        results.append({'url': href, 'title': title, 'img_src': thumbnail, 'content': content})
    # return results
    return results
@@ -0,0 +1,64 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Freesound (Sound)
 """
 from json import loads
 from urllib.parse import urlencode
 from datetime import datetime
 disabled = True
 api_key = ""
 # about
 about = {
    "website": "https://freesound.org",
    "wikidata_id": "Q835703",
    "official_api_documentation": "https://freesound.org/docs/api",
    "use_official_api": True,
    "require_api_key": True,
    "results": "JSON",
 }
 # engine dependent config
 paging = True
 # search url
 url = "https://freesound.org/apiv2/"
 search_url = (
    url + "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}"
 )
 # search request
 def request(query, params):
    params["url"] = search_url.format(
        query=urlencode({"q": query}),
        page=params["pageno"],
        api_key=api_key,
    )
    return params
 # get response from search request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # parse results
    for result in search_res.get("results", []):
        title = result["name"]
        content = result["description"][:128]
        publishedDate = datetime.fromisoformat(result["created"])
        uri = result["download"]
        # append result
        results.append(
            {
                "url": result["url"],
                "title": title,
                "publishedDate": publishedDate,
                "audio_src": uri,
                "content": content,
            }
        )
    return results
@@ -0,0 +1,51 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Frinkiac (Images)
 """
 from json import loads
 from urllib.parse import urlencode
 # about
 about = {
    "website": 'https://frinkiac.com',
    "wikidata_id": 'Q24882614',
    "official_api_documentation": {'url': None, 'comment': 'see https://github.com/MitchellAW/CompuGlobal'},
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
 }
 categories = ['images']
 BASE = 'https://frinkiac.com/'
 SEARCH_URL = '{base}api/search?{query}'
 RESULT_URL = '{base}?{query}'
 THUMB_URL = '{base}img/{episode}/{timestamp}/medium.jpg'
 IMAGE_URL = '{base}img/{episode}/{timestamp}.jpg'
 def request(query, params):
    params['url'] = SEARCH_URL.format(base=BASE, query=urlencode({'q': query}))
    return params
 def response(resp):
    results = []
    response_data = loads(resp.text)
    for result in response_data:
        episode = result['Episode']
        timestamp = result['Timestamp']
        results.append(
            {
                'template': 'images.html',
                'url': RESULT_URL.format(base=BASE, query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})),
                'title': episode,
                'content': '',
                'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp),
                'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp),
            }
        )
    return results
@@ -0,0 +1,103 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pylint: disable=invalid-name
 """Genius
 """
 from urllib.parse import urlencode
 from datetime import datetime
 # about
 about = {
    "website": 'https://genius.com/',
    "wikidata_id": 'Q3419343',
    "official_api_documentation": 'https://docs.genius.com/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 # engine dependent config
 categories = ['music', 'lyrics']
 paging = True
 page_size = 5
 url = 'https://genius.com/api/'
 search_url = url + 'search/{index}?{query}&page={pageno}&per_page={page_size}'
 music_player = 'https://genius.com{api_path}/apple_music_player'
 def request(query, params):
    params['url'] = search_url.format(
        query=urlencode({'q': query}),
        index='multi',
        page_size=page_size,
        pageno=params['pageno'],
    )
    return params
 def parse_lyric(hit):
    content = ''
    highlights = hit['highlights']
    if highlights:
        content = hit['highlights'][0]['value']
    else:
        content = hit['result'].get('title_with_featured', '')
    timestamp = hit['result']['lyrics_updated_at']
    result = {
        'url': hit['result']['url'],
        'title': hit['result']['full_title'],
        'content': content,
        'img_src': hit['result']['song_art_image_thumbnail_url'],
    }
    if timestamp:
        result.update({'publishedDate': datetime.fromtimestamp(timestamp)})
    api_path = hit['result'].get('api_path')
    if api_path:
        # The players are just playing 30sec from the title.  Some of the player
        # will be blocked because of a cross-origin request and some players will
        # link to apple when you press the play button.
        result['iframe_src'] = music_player.format(api_path=api_path)
    return result
 def parse_artist(hit):
    result = {
        'url': hit['result']['url'],
        'title': hit['result']['name'],
        'content': '',
        'img_src': hit['result']['image_url'],
    }
    return result
 def parse_album(hit):
    res = hit['result']
    content = res.get('name_with_artist', res.get('name', ''))
    x = res.get('release_date_components')
    if x:
        x = x.get('year')
        if x:
            content = "%s / %s" % (x, content)
    return {
        'url': res['url'],
        'title': res['full_title'],
        'img_src': res['cover_art_url'],
        'content': content.strip(),
    }
 parse = {'lyric': parse_lyric, 'song': parse_lyric, 'artist': parse_artist, 'album': parse_album}
 def response(resp):
    results = []
    for section in resp.json()['response']['sections']:
        for hit in section['hits']:
            func = parse.get(hit['type'])
            if func:
                results.append(func(hit))
    return results
@@ -0,0 +1,124 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Gentoo Wiki
 """
 from urllib.parse import urlencode, urljoin
 from lxml import html
 from searx.utils import extract_text
 # about
 about = {
    "website": 'https://wiki.gentoo.org/',
    "wikidata_id": 'Q1050637',
    "official_api_documentation": 'https://wiki.gentoo.org/api.php',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['it', 'software wikis']
 paging = True
 base_url = 'https://wiki.gentoo.org'
 # xpath queries
 xpath_results = '//ul[@class="mw-search-results"]/li'
 xpath_link = './/div[@class="mw-search-result-heading"]/a'
 xpath_content = './/div[@class="searchresult"]'
 # cut 'en' from 'en-US', 'de' from 'de-CH', and so on
 def locale_to_lang_code(locale):
    if locale.find('-') >= 0:
        locale = locale.split('-')[0]
    return locale
 # wikis for some languages were moved off from the main site, we need to make
 # requests to correct URLs to be able to get results in those languages
 lang_urls = {
    'en': {'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}'},
    'others': {
        'base': 'https://wiki.gentoo.org',
        'search': '/index.php?title=Special:Search&offset={offset}&{query}\
                &profile=translation&languagefilter={language}',
    },
 }
 # get base & search URLs for selected language
 def get_lang_urls(language):
    if language != 'en':
        return lang_urls['others']
    return lang_urls['en']
 # Language names to build search requests for
 # those languages which are hosted on the main site.
 main_langs = {
    'ar': 'العربية',
    'bg': 'Български',
    'cs': 'Česky',
    'da': 'Dansk',
    'el': 'Ελληνικά',
    'es': 'Español',
    'he': 'עברית',
    'hr': 'Hrvatski',
    'hu': 'Magyar',
    'it': 'Italiano',
    'ko': '한국어',
    'lt': 'Lietuviškai',
    'nl': 'Nederlands',
    'pl': 'Polski',
    'pt': 'Português',
    'ru': 'Русский',
    'sl': 'Slovenský',
    'th': 'ไทย',
    'uk': 'Українська',
    'zh': '简体中文',
 }
 # do search-request
 def request(query, params):
    # translate the locale (e.g. 'en-US') to language code ('en')
    language = locale_to_lang_code(params['language'])
    # if our language is hosted on the main site, we need to add its name
    # to the query in order to narrow the results to that language
    if language in main_langs:
        query += ' (' + main_langs[language] + ')'
    # prepare the request parameters
    query = urlencode({'search': query})
    offset = (params['pageno'] - 1) * 20
    # get request URLs for our language of choice
    urls = get_lang_urls(language)
    search_url = urls['base'] + urls['search']
    params['url'] = search_url.format(query=query, offset=offset, language=language)
    return params
 # get response from search-request
 def response(resp):
    # get the base URL for the language in which request was made
    language = locale_to_lang_code(resp.search_params['language'])
    base_url = get_lang_urls(language)['base']
    results = []
    dom = html.fromstring(resp.text)
    # parse results
    for result in dom.xpath(xpath_results):
        link = result.xpath(xpath_link)[0]
        href = urljoin(base_url, link.attrib.get('href'))
        title = extract_text(link)
        content = extract_text(result.xpath(xpath_content))
        results.append({'url': href, 'title': title, 'content': content})
    return results
@@ -0,0 +1,61 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 Github (IT)
 """
 from json import loads
 from urllib.parse import urlencode
 # about
 about = {
    "website": 'https://github.com/',
    "wikidata_id": 'Q364',
    "official_api_documentation": 'https://developer.github.com/v3/',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 # engine dependent config
 categories = ['it', 'repos']
 # search-url
 search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}'  # noqa
 accept_header = 'application/vnd.github.preview.text-match+json'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(query=urlencode({'q': query}))
    params['headers']['Accept'] = accept_header
    return params
 # get response from search-request
 def response(resp):
    results = []
    search_res = loads(resp.text)
    # check if items are received
    if 'items' not in search_res:
        return []
    # parse results
    for res in search_res['items']:
        title = res['name']
        url = res['html_url']
        if res['description']:
            content = res['description'][:500]
        else:
            content = ''
        # append result
        results.append({'url': url, 'title': title, 'content': content})
    # return results
    return results
@@ -0,0 +1,493 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This is the implementation of the Google WEB engine.  Some of this
 implementations (manly the :py:obj:`get_google_info`) are shared by other
 engines:
 - :ref:`google images engine`
 - :ref:`google news engine`
 - :ref:`google videos engine`
 - :ref:`google scholar engine`
 - :ref:`google autocomplete`
 """
 from typing import TYPE_CHECKING
 import re
 from urllib.parse import urlencode
 from lxml import html
 import babel
 import babel.core
 import babel.languages
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 from searx.locales import language_tag, region_tag, get_offical_locales
 from searx.network import get  # see https://github.com/searxng/searxng/issues/762
 from searx.exceptions import SearxEngineCaptchaException
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://www.google.com',
    "wikidata_id": 'Q9366',
    "official_api_documentation": 'https://developers.google.com/custom-search/',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['general', 'web']
 paging = True
 time_range_support = True
 safesearch = True
 time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'}
 # Filter results. 0: None, 1: Moderate, 2: Strict
 filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
 # specific xpath variables
 # ------------------------
 results_xpath = './/div[contains(@jscontroller, "SC7lYd")]'
 title_xpath = './/a/h3[1]'
 href_xpath = './/a[h3]/@href'
 content_xpath = './/div[@data-sncf]'
 # Suggestions are links placed in a *card-section*, we extract only the text
 # from the links not the links itself.
 suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a'
 # UI_ASYNC = 'use_ac:true,_fmt:html' # returns a HTTP 500 when user search for
 #                                    # celebrities like '!google natasha allegri'
 #                                    # or '!google chris evans'
 UI_ASYNC = 'use_ac:true,_fmt:prog'
 """Format of the response from UI's async request."""
 def get_google_info(params, eng_traits):
    """Composing various (language) properties for the google engines (:ref:`google
    API`).
    This function is called by the various google engines (:ref:`google web
    engine`, :ref:`google images engine`, :ref:`google news engine` and
    :ref:`google videos engine`).
    :param dict param: Request parameters of the engine.  At least
        a ``searxng_locale`` key should be in the dictionary.
    :param eng_traits: Engine's traits fetched from google preferences
        (:py:obj:`searx.enginelib.traits.EngineTraits`)
    :rtype: dict
    :returns:
        Py-Dictionary with the key/value pairs:
        language:
            The language code that is used by google (e.g. ``lang_en`` or
            ``lang_zh-TW``)
        country:
            The country code that is used by google (e.g. ``US`` or ``TW``)
        locale:
            A instance of :py:obj:`babel.core.Locale` build from the
            ``searxng_locale`` value.
        subdomain:
            Google subdomain :py:obj:`google_domains` that fits to the country
            code.
        params:
            Py-Dictionary with additional request arguments (can be passed to
            :py:func:`urllib.parse.urlencode`).
            - ``hl`` parameter: specifies the interface language of user interface.
            - ``lr`` parameter: restricts search results to documents written in
              a particular language.
            - ``cr`` parameter: restricts search results to documents
              originating in a particular country.
            - ``ie`` parameter: sets the character encoding scheme that should
              be used to interpret the query string ('utf8').
            - ``oe`` parameter: sets the character encoding scheme that should
              be used to decode the XML result ('utf8').
        headers:
            Py-Dictionary with additional HTTP headers (can be passed to
            request's headers)
            - ``Accept: '*/*``
    """
    ret_val = {
        'language': None,
        'country': None,
        'subdomain': None,
        'params': {},
        'headers': {},
        'cookies': {},
        'locale': None,
    }
    sxng_locale = params.get('searxng_locale', 'all')
    try:
        locale = babel.Locale.parse(sxng_locale, sep='-')
    except babel.core.UnknownLocaleError:
        locale = None
    eng_lang = eng_traits.get_language(sxng_locale, 'lang_en')
    lang_code = eng_lang.split('_')[-1]  # lang_zh-TW --> zh-TW / lang_en --> en
    country = eng_traits.get_region(sxng_locale, eng_traits.all_locale)
    # Test zh_hans & zh_hant --> in the topmost links in the result list of list
    # TW and HK you should a find wiktionary.org zh_hant link.  In the result
    # list of zh-CN should not be no hant link instead you should find
    # zh.m.wikipedia.org/zh somewhere in the top.
    # '!go 日 :zh-TW' --> https://zh.m.wiktionary.org/zh-hant/%E6%97%A5
    # '!go 日 :zh-CN' --> https://zh.m.wikipedia.org/zh/%E6%97%A5
    ret_val['language'] = eng_lang
    ret_val['country'] = country
    ret_val['locale'] = locale
    ret_val['subdomain'] = eng_traits.custom['supported_domains'].get(country.upper(), 'www.google.com')
    # hl parameter:
    #   The hl parameter specifies the interface language (host language) of
    #   your user interface. To improve the performance and the quality of your
    #   search results, you are strongly encouraged to set this parameter
    #   explicitly.
    #   https://developers.google.com/custom-search/docs/xml_results#hlsp
    # The Interface Language:
    #   https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
    # https://github.com/searxng/searxng/issues/2515#issuecomment-1607150817
    ret_val['params']['hl'] = f'{lang_code}-{country}'
    # lr parameter:
    #   The lr (language restrict) parameter restricts search results to
    #   documents written in a particular language.
    #   https://developers.google.com/custom-search/docs/xml_results#lrsp
    #   Language Collection Values:
    #   https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections
    #
    # To select 'all' languages an empty 'lr' value is used.
    #
    # Different to other google services, Google Schloar supports to select more
    # than one language. The languages are seperated by a pipe '|' (logical OR).
    # By example: &lr=lang_zh-TW%7Clang_de selects articles written in
    # traditional chinese OR german language.
    ret_val['params']['lr'] = eng_lang
    if sxng_locale == 'all':
        ret_val['params']['lr'] = ''
    # cr parameter:
    #   The cr parameter restricts search results to documents originating in a
    #   particular country.
    #   https://developers.google.com/custom-search/docs/xml_results#crsp
    ret_val['params']['cr'] = 'country' + country
    if sxng_locale == 'all':
        ret_val['params']['cr'] = ''
    # gl parameter: (mandatory by Geeogle News)
    #   The gl parameter value is a two-letter country code. For WebSearch
    #   results, the gl parameter boosts search results whose country of origin
    #   matches the parameter value. See the Country Codes section for a list of
    #   valid values.
    #   Specifying a gl parameter value in WebSearch requests should improve the
    #   relevance of results. This is particularly true for international
    #   customers and, even more specifically, for customers in English-speaking
    #   countries other than the United States.
    #   https://developers.google.com/custom-search/docs/xml_results#glsp
    # https://github.com/searxng/searxng/issues/2515#issuecomment-1606294635
    # ret_val['params']['gl'] = country
    # ie parameter:
    #   The ie parameter sets the character encoding scheme that should be used
    #   to interpret the query string. The default ie value is latin1.
    #   https://developers.google.com/custom-search/docs/xml_results#iesp
    ret_val['params']['ie'] = 'utf8'
    # oe parameter:
    #   The oe parameter sets the character encoding scheme that should be used
    #   to decode the XML result. The default oe value is latin1.
    #   https://developers.google.com/custom-search/docs/xml_results#oesp
    ret_val['params']['oe'] = 'utf8'
    # num parameter:
    #   The num parameter identifies the number of search results to return.
    #   The default num value is 10, and the maximum value is 20. If you request
    #   more than 20 results, only 20 results will be returned.
    #   https://developers.google.com/custom-search/docs/xml_results#numsp
    # HINT: seems to have no effect (tested in google WEB & Images)
    # ret_val['params']['num'] = 20
    # HTTP headers
    ret_val['headers']['Accept'] = '*/*'
    # Cookies
    # - https://github.com/searxng/searxng/pull/1679#issuecomment-1235432746
    # - https://github.com/searxng/searxng/issues/1555
    ret_val['cookies']['CONSENT'] = "YES+"
    return ret_val
 def detect_google_sorry(resp):
    if resp.url.host == 'sorry.google.com' or resp.url.path.startswith('/sorry'):
        raise SearxEngineCaptchaException()
 def request(query, params):
    """Google search request"""
    # pylint: disable=line-too-long
    offset = (params['pageno'] - 1) * 10
    google_info = get_google_info(params, traits)
    # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
    query_url = (
        'https://'
        + google_info['subdomain']
        + '/search'
        + "?"
        + urlencode(
            {
                'q': query,
                **google_info['params'],
                'filter': '0',
                'start': offset,
                # 'vet': '12ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0QxK8CegQIARAC..i',
                # 'ved': '2ahUKEwik3ZbIzfn7AhXMX_EDHbUDBh0Q_skCegQIARAG',
                # 'cs' : 1,
                # 'sa': 'N',
                # 'yv': 3,
                # 'prmd': 'vin',
                # 'ei': 'GASaY6TxOcy_xc8PtYeY6AE',
                # 'sa': 'N',
                # 'sstk': 'AcOHfVkD7sWCSAheZi-0tx_09XDO55gTWY0JNq3_V26cNN-c8lfD45aZYPI8s_Bqp8s57AHz5pxchDtAGCA_cikAWSjy9kw3kgg'
                # formally known as use_mobile_ui
                'asearch': 'arc',
                'async': UI_ASYNC,
            }
        )
    )
    if params['time_range'] in time_range_dict:
        query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
    if params['safesearch']:
        query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
    params['url'] = query_url
    params['cookies'] = google_info['cookies']
    params['headers'].update(google_info['headers'])
    return params
 # =26;[3,"dimg_ZNMiZPCqE4apxc8P3a2tuAQ_137"]a87;data:image/jpeg;base64,/9j/4AAQSkZJRgABA
 # ...6T+9Nl4cnD+gr9OK8I56/tX3l86nWYw//2Q==26;
 RE_DATA_IMAGE = re.compile(r'"(dimg_[^"]*)"[^;]*;(data:image[^;]*;[^;]*);')
 def _parse_data_images(dom):
    data_image_map = {}
    for img_id, data_image in RE_DATA_IMAGE.findall(dom.text_content()):
        end_pos = data_image.rfind('=')
        if end_pos > 0:
            data_image = data_image[: end_pos + 1]
        data_image_map[img_id] = data_image
    logger.debug('data:image objects --> %s', list(data_image_map.keys()))
    return data_image_map
 def response(resp):
    """Get response from google's search request"""
    # pylint: disable=too-many-branches, too-many-statements
    detect_google_sorry(resp)
    results = []
    # convert the text to dom
    dom = html.fromstring(resp.text)
    data_image_map = _parse_data_images(dom)
    # results --> answer
    answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
    if answer_list:
        answer_list = [_.xpath("normalize-space()") for _ in answer_list]
        results.append({'answer': ' '.join(answer_list)})
    else:
        logger.debug("did not find 'answer'")
    # parse results
    for result in eval_xpath_list(dom, results_xpath):  # pylint: disable=too-many-nested-blocks
        try:
            title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
            if title_tag is None:
                # this not one of the common google results *section*
                logger.debug('ignoring item from the result_xpath list: missing title')
                continue
            title = extract_text(title_tag)
            url = eval_xpath_getindex(result, href_xpath, 0, None)
            if url is None:
                logger.debug('ignoring item from the result_xpath list: missing url of title "%s"', title)
                continue
            content_nodes = eval_xpath(result, content_xpath)
            content = extract_text(content_nodes)
            if not content:
                logger.debug('ignoring item from the result_xpath list: missing content of title "%s"', title)
                continue
            img_src = content_nodes[0].xpath('.//img/@src')
            if img_src:
                img_src = img_src[0]
                if img_src.startswith('data:image'):
                    img_id = content_nodes[0].xpath('.//img/@id')
                    if img_id:
                        img_src = data_image_map.get(img_id[0])
            else:
                img_src = None
            results.append({'url': url, 'title': title, 'content': content, 'img_src': img_src})
        except Exception as e:  # pylint: disable=broad-except
            logger.error(e, exc_info=True)
            continue
    # parse suggestion
    for suggestion in eval_xpath_list(dom, suggestion_xpath):
        # append suggestion
        results.append({'suggestion': extract_text(suggestion)})
    # return results
    return results
 # get supported languages from their site
 skip_countries = [
    # official language of google-country not in google-languages
    'AL',  # Albanien (sq)
    'AZ',  # Aserbaidschan  (az)
    'BD',  # Bangladesch (bn)
    'BN',  # Brunei Darussalam (ms)
    'BT',  # Bhutan (dz)
    'ET',  # Äthiopien (am)
    'GE',  # Georgien (ka, os)
    'GL',  # Grönland (kl)
    'KH',  # Kambodscha (km)
    'LA',  # Laos (lo)
    'LK',  # Sri Lanka (si, ta)
    'ME',  # Montenegro (sr)
    'MK',  # Nordmazedonien (mk, sq)
    'MM',  # Myanmar (my)
    'MN',  # Mongolei (mn)
    'MV',  # Malediven (dv) // dv_MV is unknown by babel
    'MY',  # Malaysia (ms)
    'NP',  # Nepal (ne)
    'TJ',  # Tadschikistan (tg)
    'TM',  # Turkmenistan (tk)
    'UZ',  # Usbekistan (uz)
 ]
 def fetch_traits(engine_traits: EngineTraits, add_domains: bool = True):
    """Fetch languages from Google."""
    # pylint: disable=import-outside-toplevel, too-many-branches
    engine_traits.custom['supported_domains'] = {}
    resp = get('https://www.google.com/preferences')
    if not resp.ok:  # type: ignore
        raise RuntimeError("Response from Google's preferences is not OK.")
    dom = html.fromstring(resp.text)  # type: ignore
    # supported language codes
    lang_map = {'no': 'nb'}
    for x in eval_xpath_list(dom, '//*[@id="langSec"]//input[@name="lr"]'):
        eng_lang = x.get("value").split('_')[-1]
        try:
            locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-')
        except babel.UnknownLocaleError:
            print("ERROR: %s -> %s is unknown by babel" % (x.get("data-name"), eng_lang))
            continue
        sxng_lang = language_tag(locale)
        conflict = engine_traits.languages.get(sxng_lang)
        if conflict:
            if conflict != eng_lang:
                print("CONFLICT: babel %s --> %s, %s" % (sxng_lang, conflict, eng_lang))
            continue
        engine_traits.languages[sxng_lang] = 'lang_' + eng_lang
    # alias languages
    engine_traits.languages['zh'] = 'lang_zh-CN'
    # supported region codes
    for x in eval_xpath_list(dom, '//*[@name="region"]/..//input[@name="region"]'):
        eng_country = x.get("value")
        if eng_country in skip_countries:
            continue
        if eng_country == 'ZZ':
            engine_traits.all_locale = 'ZZ'
            continue
        sxng_locales = get_offical_locales(eng_country, engine_traits.languages.keys(), regional=True)
        if not sxng_locales:
            print("ERROR: can't map from google country %s (%s) to a babel region." % (x.get('data-name'), eng_country))
            continue
        for sxng_locale in sxng_locales:
            engine_traits.regions[region_tag(sxng_locale)] = eng_country
    # alias regions
    engine_traits.regions['zh-CN'] = 'HK'
    # supported domains
    if add_domains:
        resp = get('https://www.google.com/supported_domains')
        if not resp.ok:  # type: ignore
            raise RuntimeError("Response from https://www.google.com/supported_domains is not OK.")
        for domain in resp.text.split():  # type: ignore
            domain = domain.strip()
            if not domain or domain in [
                '.google.com',
            ]:
                continue
            region = domain.split('.')[-1].upper()
            engine_traits.custom['supported_domains'][region] = 'www' + domain  # type: ignore
            if region == 'HK':
                # There is no google.cn, we use .com.hk for zh-CN
                engine_traits.custom['supported_domains']['CN'] = 'www' + domain  # type: ignore
@@ -0,0 +1,129 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This is the implementation of the Google Images engine using the internal
 Google API used by the Google Go Android app.
 This internal API offer results in
 - JSON (``_fmt:json``)
 - Protobuf_ (``_fmt:pb``)
 - Protobuf_ compressed? (``_fmt:pc``)
 - HTML (``_fmt:html``)
 - Protobuf_ encoded in JSON (``_fmt:jspb``).
 .. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
 """
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode
 from json import loads
 from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
    get_google_info,
    time_range_dict,
    detect_google_sorry,
 )
 if TYPE_CHECKING:
    import logging
    from searx.enginelib.traits import EngineTraits
    logger: logging.Logger
    traits: EngineTraits
 # about
 about = {
    "website": 'https://images.google.com',
    "wikidata_id": 'Q521550',
    "official_api_documentation": 'https://developers.google.com/custom-search',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
 }
 # engine dependent config
 categories = ['images', 'web']
 paging = True
 time_range_support = True
 safesearch = True
 send_accept_language_header = True
 filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
 def request(query, params):
    """Google-Image search request"""
    google_info = get_google_info(params, traits)
    query_url = (
        'https://'
        + google_info['subdomain']
        + '/search'
        + "?"
        + urlencode(
            {
                'q': query,
                'tbm': "isch",
                **google_info['params'],
                'asearch': 'isch',
                'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
            }
        )
    )
    if params['time_range'] in time_range_dict:
        query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
    if params['safesearch']:
        query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
    params['url'] = query_url
    params['cookies'] = google_info['cookies']
    params['headers'].update(google_info['headers'])
    return params
 def response(resp):
    """Get response from google's search request"""
    results = []
    detect_google_sorry(resp)
    json_start = resp.text.find('{"ischj":')
    json_data = loads(resp.text[json_start:])
    for item in json_data["ischj"]["metadata"]:
        result_item = {
            'url': item["result"]["referrer_url"],
            'title': item["result"]["page_title"],
            'content': item["text_in_grid"]["snippet"],
            'source': item["result"]["site_title"],
            'img_format': f'{item["original_image"]["width"]} x {item["original_image"]["height"]}',
            'img_src': item["original_image"]["url"],
            'thumbnail_src': item["thumbnail"]["url"],
            'template': 'images.html',
        }
        author = item["result"].get('iptc', {}).get('creator')
        if author:
            result_item['author'] = ', '.join(author)
        copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
        if copyright_notice:
            result_item['source'] += ' | ' + copyright_notice
        freshness_date = item["result"].get("freshness_date")
        if freshness_date:
            result_item['source'] += ' | ' + freshness_date
        file_size = item.get('gsa', {}).get('file_size')
        if file_size:
            result_item['source'] += ' (%s)' % file_size
        results.append(result_item)
    return results
@@ -0,0 +1,305 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This is the implementation of the Google News engine.
 Google News has a different region handling compared to Google WEB.
 - the ``ceid`` argument has to be set (:py:obj:`ceid_list`)
 - the hl_ argument has to be set correctly (and different to Google WEB)
 - the gl_ argument is mandatory
 If one of this argument is not set correctly, the request is redirected to
 CONSENT dialog::
  https://consent.google.com/m?continue=
 The google news API ignores some parameters from the common :ref:`google API`:
 - num_ : the number of search results is ignored / there is no paging all
  results for a query term are in the first response.
 - save_ : is ignored / Google-News results are always *SafeSearch*
 .. _hl: https://developers.google.com/custom-search/docs/xml_results#hlsp
 .. _gl: https://developers.google.com/custom-search/docs/xml_results#glsp
 .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp
 .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp
 """
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode
 import base64
 from lxml import html
 import babel
 from searx import locales
 from searx.utils import (
    eval_xpath,
    eval_xpath_list,
    eval_xpath_getindex,
    extract_text,
 )
 from searx.engines.google import fetch_traits as _fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
    get_google_info,
    detect_google_sorry,
 )
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://news.google.com',
    "wikidata_id": 'Q12020',
    "official_api_documentation": 'https://developers.google.com/custom-search',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['news']
 paging = False
 time_range_support = False
 # Google-News results are always *SafeSearch*. Option 'safesearch' is set to
 # False here, otherwise checker will report safesearch-errors::
 #
 #  safesearch : results are identitical for safesearch=0 and safesearch=2
 safesearch = True
 # send_accept_language_header = True
 def request(query, params):
    """Google-News search request"""
    sxng_locale = params.get('searxng_locale', 'en-US')
    ceid = locales.get_engine_locale(sxng_locale, traits.custom['ceid'], default='US:en')
    google_info = get_google_info(params, traits)
    google_info['subdomain'] = 'news.google.com'  # google news has only one domain
    ceid_region, ceid_lang = ceid.split(':')
    ceid_lang, ceid_suffix = (
        ceid_lang.split('-')
        + [
            None,
        ]
    )[:2]
    google_info['params']['hl'] = ceid_lang
    if ceid_suffix and ceid_suffix not in ['Hans', 'Hant']:
        if ceid_region.lower() == ceid_lang:
            google_info['params']['hl'] = ceid_lang + '-' + ceid_region
        else:
            google_info['params']['hl'] = ceid_lang + '-' + ceid_suffix
    elif ceid_region.lower() != ceid_lang:
        if ceid_region in ['AT', 'BE', 'CH', 'IL', 'SA', 'IN', 'BD', 'PT']:
            google_info['params']['hl'] = ceid_lang
        else:
            google_info['params']['hl'] = ceid_lang + '-' + ceid_region
    google_info['params']['lr'] = 'lang_' + ceid_lang.split('-')[0]
    google_info['params']['gl'] = ceid_region
    query_url = (
        'https://'
        + google_info['subdomain']
        + "/search?"
        + urlencode(
            {
                'q': query,
                **google_info['params'],
            }
        )
        # ceid includes a ':' character which must not be urlencoded
        + ('&ceid=%s' % ceid)
    )
    params['url'] = query_url
    params['cookies'] = google_info['cookies']
    params['headers'].update(google_info['headers'])
    return params
 def response(resp):
    """Get response from google's search request"""
    results = []
    detect_google_sorry(resp)
    # convert the text to dom
    dom = html.fromstring(resp.text)
    for result in eval_xpath_list(dom, '//div[@class="xrnccd"]'):
        # The first <a> tag in the <article> contains the link to the article
        # The href attribute of the <a> tag is a google internal link, we have
        # to decode
        href = eval_xpath_getindex(result, './article/a/@href', 0)
        href = href.split('?')[0]
        href = href.split('/')[-1]
        href = base64.urlsafe_b64decode(href + '====')
        href = href[href.index(b'http') :].split(b'\xd2')[0]
        href = href.decode()
        title = extract_text(eval_xpath(result, './article/h3[1]'))
        # The pub_date is mostly a string like 'yesertday', not a real
        # timezone date or time.  Therefore we can't use publishedDate.
        pub_date = extract_text(eval_xpath(result, './article//time'))
        pub_origin = extract_text(eval_xpath(result, './article//a[@data-n-tid]'))
        content = ' / '.join([x for x in [pub_origin, pub_date] if x])
        # The image URL is located in a preceding sibling <img> tag, e.g.:
        # "https://lh3.googleusercontent.com/DjhQh7DMszk.....z=-p-h100-w100"
        # These URL are long but not personalized (double checked via tor).
        img_src = extract_text(result.xpath('preceding-sibling::a/figure/img/@src'))
        results.append(
            {
                'url': href,
                'title': title,
                'content': content,
                'img_src': img_src,
            }
        )
    # return results
    return results
 ceid_list = [
    'AE:ar',
    'AR:es-419',
    'AT:de',
    'AU:en',
    'BD:bn',
    'BE:fr',
    'BE:nl',
    'BG:bg',
    'BR:pt-419',
    'BW:en',
    'CA:en',
    'CA:fr',
    'CH:de',
    'CH:fr',
    'CL:es-419',
    'CN:zh-Hans',
    'CO:es-419',
    'CU:es-419',
    'CZ:cs',
    'DE:de',
    'EG:ar',
    'ES:es',
    'ET:en',
    'FR:fr',
    'GB:en',
    'GH:en',
    'GR:el',
    'HK:zh-Hant',
    'HU:hu',
    'ID:en',
    'ID:id',
    'IE:en',
    'IL:en',
    'IL:he',
    'IN:bn',
    'IN:en',
    'IN:hi',
    'IN:ml',
    'IN:mr',
    'IN:ta',
    'IN:te',
    'IT:it',
    'JP:ja',
    'KE:en',
    'KR:ko',
    'LB:ar',
    'LT:lt',
    'LV:en',
    'LV:lv',
    'MA:fr',
    'MX:es-419',
    'MY:en',
    'NA:en',
    'NG:en',
    'NL:nl',
    'NO:no',
    'NZ:en',
    'PE:es-419',
    'PH:en',
    'PK:en',
    'PL:pl',
    'PT:pt-150',
    'RO:ro',
    'RS:sr',
    'RU:ru',
    'SA:ar',
    'SE:sv',
    'SG:en',
    'SI:sl',
    'SK:sk',
    'SN:fr',
    'TH:th',
    'TR:tr',
    'TW:zh-Hant',
    'TZ:en',
    'UA:ru',
    'UA:uk',
    'UG:en',
    'US:en',
    'US:es-419',
    'VE:es-419',
    'VN:vi',
    'ZA:en',
    'ZW:en',
 ]
 """List of region/language combinations supported by Google News.  Values of the
 ``ceid`` argument of the Google News REST API."""
 _skip_values = [
    'ET:en',  # english (ethiopia)
    'ID:en',  # english (indonesia)
    'LV:en',  # english (latvia)
 ]
 _ceid_locale_map = {'NO:no': 'nb-NO'}
 def fetch_traits(engine_traits: EngineTraits):
    _fetch_traits(engine_traits, add_domains=False)
    engine_traits.custom['ceid'] = {}
    for ceid in ceid_list:
        if ceid in _skip_values:
            continue
        region, lang = ceid.split(':')
        x = lang.split('-')
        if len(x) > 1:
            if x[1] not in ['Hant', 'Hans']:
                lang = x[0]
        sxng_locale = _ceid_locale_map.get(ceid, lang + '-' + region)
        try:
            locale = babel.Locale.parse(sxng_locale, sep='-')
        except babel.UnknownLocaleError:
            print("ERROR: %s -> %s is unknown by babel" % (ceid, sxng_locale))
            continue
        engine_traits.custom['ceid'][locales.region_tag(locale)] = ceid
@@ -0,0 +1,116 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Google Play Apps & Google Play Movies
 """
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import (
    eval_xpath,
    extract_url,
    extract_text,
    eval_xpath_list,
    eval_xpath_getindex,
 )
 about = {
    "website": "https://play.google.com/",
    "wikidata_id": "Q79576",
    "use_official_api": False,
    "require_api_key": False,
    "results": "HTML",
 }
 send_accept_language_header = True
 play_categ = None  # apps|movies
 base_url = 'https://play.google.com'
 search_url = base_url + "/store/search?{query}&c={play_categ}"
 def request(query, params):
    if play_categ not in ('movies', 'apps'):
        raise ValueError(f"unknown google play category: {play_categ}")
    params["url"] = search_url.format(
        query=urlencode({"q": query}),
        play_categ=play_categ,
    )
    params['cookies']['CONSENT'] = "YES+"
    return params
 def response(resp):
    if play_categ == 'movies':
        return response_movies(resp)
    if play_categ == 'apps':
        return response_apps(resp)
    raise ValueError(f"Unsupported play category: {play_categ}")
 def response_movies(resp):
    results = []
    dom = html.fromstring(resp.text)
    for section in eval_xpath(dom, '//c-wiz/section/header/..'):
        sec_name = extract_text(eval_xpath(section, './header'))
        for item in eval_xpath(section, './/a'):
            url = base_url + item.get('href')
            div_1, div_2 = eval_xpath(item, './div')[:2]
            title = extract_text(eval_xpath(div_2, './div[@title]'))
            metadata = extract_text(eval_xpath(div_2, './div[@class]'))
            img = eval_xpath(div_1, './/img')[0]
            img_src = img.get('src')
            results.append(
                {
                    "url": url,
                    "title": title,
                    "content": sec_name,
                    "img_src": img_src,
                    'metadata': metadata,
                    'template': 'videos.html',
                }
            )
    return results
 def response_apps(resp):
    results = []
    dom = html.fromstring(resp.text)
    if eval_xpath(dom, '//div[@class="v6DsQb"]'):
        return []
    spot = eval_xpath_getindex(dom, '//div[@class="ipRz4"]', 0, None)
    if spot is not None:
        url = extract_url(eval_xpath(spot, './a[@class="Qfxief"]/@href'), search_url)
        title = extract_text(eval_xpath(spot, './/div[@class="vWM94c"]'))
        content = extract_text(eval_xpath(spot, './/div[@class="LbQbAe"]'))
        img = extract_text(eval_xpath(spot, './/img[@class="T75of bzqKMd"]/@src'))
        results.append({"url": url, "title": title, "content": content, "img_src": img})
    more = eval_xpath_list(dom, '//c-wiz[@jsrenderer="RBsfwb"]//div[@role="listitem"]', min_len=1)
    for result in more:
        url = extract_url(eval_xpath(result, ".//a/@href"), search_url)
        title = extract_text(eval_xpath(result, './/span[@class="DdYX5"]'))
        content = extract_text(eval_xpath(result, './/span[@class="wMUdtb"]'))
        img = extract_text(
            eval_xpath(
                result,
                './/img[@class="T75of stzEZd" or @class="T75of etjhNc Q8CSx "]/@src',
            )
        )
        results.append({"url": url, "title": title, "content": content, "img_src": img})
    for suggestion in eval_xpath_list(dom, '//c-wiz[@jsrenderer="qyd4Kb"]//div[@class="ULeU3b neq64b"]'):
        results.append({"suggestion": extract_text(eval_xpath(suggestion, './/div[@class="Epkrse "]'))})
    return results
@@ -0,0 +1,217 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This is the implementation of the Google Scholar engine.
 Compared to other Google services the Scholar engine has a simple GET REST-API
 and there does not exists `async` API.  Even though the API slightly vintage we
 can make use of the :ref:`google API` to assemble the arguments of the GET
 request.
 """
 from typing import TYPE_CHECKING
 from typing import Optional
 from urllib.parse import urlencode
 from datetime import datetime
 from lxml import html
 from searx.utils import (
    eval_xpath,
    eval_xpath_getindex,
    eval_xpath_list,
    extract_text,
 )
 from searx.exceptions import SearxEngineCaptchaException
 from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
    get_google_info,
    time_range_dict,
 )
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://scholar.google.com',
    "wikidata_id": 'Q494817',
    "official_api_documentation": 'https://developers.google.com/custom-search',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['science', 'scientific publications']
 paging = True
 language_support = True
 time_range_support = True
 safesearch = False
 send_accept_language_header = True
 def time_range_args(params):
    """Returns a dictionary with a time range arguments based on
    ``params['time_range']``.
    Google Scholar supports a detailed search by year.  Searching by *last
    month* or *last week* (as offered by SearXNG) is uncommon for scientific
    publications and is not supported by Google Scholar.
    To limit the result list when the users selects a range, all the SearXNG
    ranges (*day*, *week*, *month*, *year*) are mapped to *year*.  If no range
    is set an empty dictionary of arguments is returned.  Example;  when
    user selects a time range (current year minus one in 2022):
    .. code:: python
        { 'as_ylo' : 2021 }
    """
    ret_val = {}
    if params['time_range'] in time_range_dict:
        ret_val['as_ylo'] = datetime.now().year - 1
    return ret_val
 def detect_google_captcha(dom):
    """In case of CAPTCHA Google Scholar open its own *not a Robot* dialog and is
    not redirected to ``sorry.google.com``.
    """
    if eval_xpath(dom, "//form[@id='gs_captcha_f']"):
        raise SearxEngineCaptchaException()
 def request(query, params):
    """Google-Scholar search request"""
    google_info = get_google_info(params, traits)
    # subdomain is: scholar.google.xy
    google_info['subdomain'] = google_info['subdomain'].replace("www.", "scholar.")
    args = {
        'q': query,
        **google_info['params'],
        'start': (params['pageno'] - 1) * 10,
        'as_sdt': '2007',  # include patents / to disable set '0,5'
        'as_vis': '0',  # include citations / to disable set '1'
    }
    args.update(time_range_args(params))
    params['url'] = 'https://' + google_info['subdomain'] + '/scholar?' + urlencode(args)
    params['cookies'] = google_info['cookies']
    params['headers'].update(google_info['headers'])
    return params
 def parse_gs_a(text: Optional[str]):
    """Parse the text written in green.
    Possible formats:
    * "{authors} - {journal}, {year} - {publisher}"
    * "{authors} - {year} - {publisher}"
    * "{authors} - {publisher}"
    """
    if text is None or text == "":
        return None, None, None, None
    s_text = text.split(' - ')
    authors = s_text[0].split(', ')
    publisher = s_text[-1]
    if len(s_text) != 3:
        return authors, None, publisher, None
    # the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"
    # get journal and year
    journal_year = s_text[1].split(', ')
    # journal is optional and may contains some coma
    if len(journal_year) > 1:
        journal = ', '.join(journal_year[0:-1])
        if journal == '…':
            journal = None
    else:
        journal = None
    # year
    year = journal_year[-1]
    try:
        publishedDate = datetime.strptime(year.strip(), '%Y')
    except ValueError:
        publishedDate = None
    return authors, journal, publisher, publishedDate
 def response(resp):  # pylint: disable=too-many-locals
    """Parse response from Google Scholar"""
    results = []
    # convert the text to dom
    dom = html.fromstring(resp.text)
    detect_google_captcha(dom)
    # parse results
    for result in eval_xpath_list(dom, '//div[@data-rp]'):
        title = extract_text(eval_xpath(result, './/h3[1]//a'))
        if not title:
            # this is a [ZITATION] block
            continue
        pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
        if pub_type:
            pub_type = pub_type[1:-1].lower()
        url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0)
        content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]'))
        authors, journal, publisher, publishedDate = parse_gs_a(
            extract_text(eval_xpath(result, './/div[@class="gs_a"]'))
        )
        if publisher in url:
            publisher = None
        # cited by
        comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'))
        # link to the html or pdf document
        html_url = None
        pdf_url = None
        doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None)
        doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
        if doc_type == "[PDF]":
            pdf_url = doc_url
        else:
            html_url = doc_url
        results.append(
            {
                'template': 'paper.html',
                'type': pub_type,
                'url': url,
                'title': title,
                'authors': authors,
                'publisher': publisher,
                'journal': journal,
                'publishedDate': publishedDate,
                'content': content,
                'comments': comments,
                'html_url': html_url,
                'pdf_url': pdf_url,
            }
        )
    # parse suggestion
    for suggestion in eval_xpath(dom, '//div[contains(@class, "gs_qsuggest_wrap")]//li//a'):
        # append suggestion
        results.append({'suggestion': extract_text(suggestion)})
    for correction in eval_xpath(dom, '//div[@class="gs_r gs_pda"]/a'):
        results.append({'correction': extract_text(correction)})
    return results
@@ -0,0 +1,139 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """This is the implementation of the Google Videos engine.
 .. admonition:: Content-Security-Policy (CSP)
   This engine needs to allow images from the `data URLs`_ (prefixed with the
   ``data:`` scheme)::
     Header set Content-Security-Policy "img-src 'self' data: ;"
 .. _data URLs:
   https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
 """
 from typing import TYPE_CHECKING
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import (
    eval_xpath,
    eval_xpath_list,
    eval_xpath_getindex,
    extract_text,
 )
 from searx.engines.google import fetch_traits  # pylint: disable=unused-import
 from searx.engines.google import (
    get_google_info,
    time_range_dict,
    filter_mapping,
    suggestion_xpath,
    detect_google_sorry,
 )
 from searx.enginelib.traits import EngineTraits
 if TYPE_CHECKING:
    import logging
    logger: logging.Logger
 traits: EngineTraits
 # about
 about = {
    "website": 'https://www.google.com',
    "wikidata_id": 'Q219885',
    "official_api_documentation": 'https://developers.google.com/custom-search',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 # engine dependent config
 categories = ['videos', 'web']
 paging = True
 language_support = True
 time_range_support = True
 safesearch = True
 def request(query, params):
    """Google-Video search request"""
    google_info = get_google_info(params, traits)
    query_url = (
        'https://'
        + google_info['subdomain']
        + '/search'
        + "?"
        + urlencode(
            {
                'q': query,
                'tbm': "vid",
                'start': 10 * params['pageno'],
                **google_info['params'],
                'asearch': 'arc',
                'async': 'use_ac:true,_fmt:html',
            }
        )
    )
    if params['time_range'] in time_range_dict:
        query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
    if params['safesearch']:
        query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
    params['url'] = query_url
    params['cookies'] = google_info['cookies']
    params['headers'].update(google_info['headers'])
    return params
 def response(resp):
    """Get response from google's search request"""
    results = []
    detect_google_sorry(resp)
    # convert the text to dom
    dom = html.fromstring(resp.text)
    # parse results
    for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
        img_src = eval_xpath_getindex(result, './/img/@src', 0, None)
        if img_src is None:
            continue
        title = extract_text(eval_xpath_getindex(result, './/a/h3[1]', 0))
        url = eval_xpath_getindex(result, './/a/h3[1]/../@href', 0)
        c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
        content = extract_text(c_node)
        pub_info = extract_text(eval_xpath(result, './/div[@class="P7xzyf"]'))
        length = extract_text(eval_xpath(result, './/div[@class="J1mWY"]'))
        results.append(
            {
                'url': url,
                'title': title,
                'content': content,
                'author': pub_info,
                'thumbnail': img_src,
                'length': length,
                'template': 'videos.html',
            }
        )
    # parse suggestion
    for suggestion in eval_xpath_list(dom, suggestion_xpath):
        # append suggestion
        results.append({'suggestion': extract_text(suggestion)})
    return results
@@ -0,0 +1,99 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """IMDB - Internet Movie Database
 Retrieves results from a basic search.  Advanced search options are not
 supported.  IMDB's API is undocumented, here are some posts about:
 - https://stackoverflow.com/questions/1966503/does-imdb-provide-an-api
 - https://rapidapi.com/blog/how-to-use-imdb-api/
 An alternative that needs IMDPro_ is `IMDb and Box Office Mojo
 <https://developer.imdb.com/documentation>`_
 .. __IMDPro: https://pro.imdb.com/login
 """
 import json
 about = {
    "website": 'https://imdb.com/',
    "wikidata_id": 'Q37312',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
 }
 categories = []
 paging = False
 # suggestion_url = "https://sg.media-imdb.com/suggestion/{letter}/{query}.json"
 suggestion_url = "https://v2.sg.media-imdb.com/suggestion/{letter}/{query}.json"
 href_base = 'https://imdb.com/{category}/{entry_id}'
 search_categories = {"nm": "name", "tt": "title", "kw": "keyword", "co": "company", "ep": "episode"}
 def request(query, params):
    query = query.replace(" ", "_").lower()
    params['url'] = suggestion_url.format(letter=query[0], query=query)
    return params
 def response(resp):
    suggestions = json.loads(resp.text)
    results = []
    for entry in suggestions.get('d', []):
        # https://developer.imdb.com/documentation/key-concepts#imdb-ids
        entry_id = entry['id']
        categ = search_categories.get(entry_id[:2])
        if categ is None:
            logger.error('skip unknown category tag %s in %s', entry_id[:2], entry_id)
            continue
        title = entry['l']
        if 'q' in entry:
            title += " (%s)" % entry['q']
        content = ''
        if 'rank' in entry:
            content += "(%s) " % entry['rank']
        if 'y' in entry:
            content += str(entry['y']) + " - "
        if 's' in entry:
            content += entry['s']
        # imageUrl is the image itself, it is not a thumb!
        image_url = entry.get('i', {}).get('imageUrl')
        if image_url:
            # get thumbnail
            image_url_name, image_url_prefix = image_url.rsplit('.', 1)
            # recipe to get the magic value:
            #  * search on imdb.com, look at the URL of the thumbnail on the right side of the screen
            #  * search using the imdb engine, compare the imageUrl and thumbnail URL
            # QL75 : JPEG quality (?)
            # UX280 : resize to width 320
            # 280,414 : size of the image (add white border)
            magic = 'QL75_UX280_CR0,0,280,414_'
            if not image_url_name.endswith('_V1_'):
                magic = '_V1_' + magic
            image_url = image_url_name + magic + '.' + image_url_prefix
        results.append(
            {
                "title": title,
                "url": href_base.format(category=categ, entry_id=entry_id),
                "content": content,
                "img_src": image_url,
            }
        )
    return results
@@ -0,0 +1,75 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 INA (Videos)
 """
 from html import unescape
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 # about
 about = {
    "website": 'https://www.ina.fr/',
    "wikidata_id": 'Q1665109',
    "official_api_documentation": None,
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
    "language": 'fr',
 }
 # engine dependent config
 categories = ['videos']
 paging = True
 page_size = 12
 # search-url
 base_url = 'https://www.ina.fr'
 search_url = base_url + '/ajax/recherche?{query}&espace=1&sort=pertinence&order=desc&offset={start}&modified=size'
 # specific xpath variables
 results_xpath = '//div[@id="searchHits"]/div'
 url_xpath = './/a/@href'
 title_xpath = './/div[contains(@class,"title-bloc-small")]'
 content_xpath = './/div[contains(@class,"sous-titre-fonction")]'
 thumbnail_xpath = './/img/@data-src'
 publishedDate_xpath = './/div[contains(@class,"dateAgenda")]'
 # do search-request
 def request(query, params):
    params['url'] = search_url.format(start=params['pageno'] * page_size, query=urlencode({'q': query}))
    return params
 # get response from search-request
 def response(resp):
    results = []
    # we get html in a JSON container...
    dom = html.fromstring(resp.text)
    # parse results
    for result in eval_xpath_list(dom, results_xpath):
        url_relative = eval_xpath_getindex(result, url_xpath, 0)
        url = base_url + url_relative
        title = unescape(extract_text(eval_xpath(result, title_xpath)))
        thumbnail = extract_text(eval_xpath(result, thumbnail_xpath))
        content = extract_text(eval_xpath(result, publishedDate_xpath)) + extract_text(
            eval_xpath(result, content_xpath)
        )
        # append result
        results.append(
            {
                'url': url,
                'title': title,
                'content': content,
                'template': 'videos.html',
                'thumbnail': thumbnail,
            }
        )
    # return results
    return results
@@ -0,0 +1,99 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 """Invidious (Videos)
 """
 import time
 import random
 from urllib.parse import quote_plus
 from dateutil import parser
 # about
 about = {
    "website": 'https://api.invidious.io/',
    "wikidata_id": 'Q79343316',
    "official_api_documentation": 'https://github.com/iv-org/documentation/blob/master/API.md',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
 }
 # engine dependent config
 categories = ["videos", "music"]
 paging = True
 time_range_support = True
 # base_url can be overwritten by a list of URLs in the settings.yml
 base_url = 'https://vid.puffyan.us'
 def request(query, params):
    time_range_dict = {
        "day": "today",
        "week": "week",
        "month": "month",
        "year": "year",
    }
    if isinstance(base_url, list):
        params["base_url"] = random.choice(base_url)
    else:
        params["base_url"] = base_url
    search_url = params["base_url"] + "/api/v1/search?q={query}"
    params["url"] = search_url.format(query=quote_plus(query)) + "&page={pageno}".format(pageno=params["pageno"])
    if params["time_range"] in time_range_dict:
        params["url"] += "&date={timerange}".format(timerange=time_range_dict[params["time_range"]])
    if params["language"] != "all":
        lang = params["language"].split("-")
        if len(lang) == 2:
            params["url"] += "&range={lrange}".format(lrange=lang[1])
    return params
 def response(resp):
    results = []
    search_results = resp.json()
    base_invidious_url = resp.search_params['base_url'] + "/watch?v="
    for result in search_results:
        rtype = result.get("type", None)
        if rtype == "video":
            videoid = result.get("videoId", None)
            if not videoid:
                continue
            url = base_invidious_url + videoid
            thumbs = result.get("videoThumbnails", [])
            thumb = next((th for th in thumbs if th["quality"] == "sddefault"), None)
            if thumb:
                thumbnail = thumb.get("url", "")
            else:
                thumbnail = ""
            publishedDate = parser.parse(time.ctime(result.get("published", 0)))
            length = time.gmtime(result.get("lengthSeconds"))
            if length.tm_hour:
                length = time.strftime("%H:%M:%S", length)
            else:
                length = time.strftime("%M:%S", length)
            results.append(
                {
                    "url": url,
                    "title": result.get("title", ""),
                    "content": result.get("description", ""),
                    'length': length,
                    "template": "videos.html",
                    "author": result.get("author"),
                    "publishedDate": publishedDate,
                    "iframe_src": resp.search_params['base_url'] + '/embed/' + videoid,
                    "thumbnail": thumbnail,
                }
            )
    return results
--- a/Show More
+++ b/Show More