Quellcode durchsuchen

Add DuckDuckGo search engine, add `extra_deps`

pull/79/head
TheresNoTime vor 1 Jahr
Ursprung
Commit
b3ef06c0f5
Es konnte kein GPG-Schlüssel zu dieser Signatur gefunden werden GPG-Schlüssel-ID: 2C15A644ABE9A27E
2 geänderte Dateien mit 21 neuen und 2 gelöschten Zeilen
  1. +20
    -2
      earwigbot/wiki/copyvios/search.py
  2. +1
    -0
      setup.py

+ 20
- 2
earwigbot/wiki/copyvios/search.py Datei anzeigen

@@ -27,13 +27,14 @@ from socket import error
from io import StringIO
from urllib.parse import quote, urlencode
from urllib.error import URLError
from duckduckgo_search import ddg

from earwigbot import importer
from earwigbot.exceptions import SearchQueryError

lxml = importer.new("lxml")

__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "SEARCH_ENGINES"]
__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "DDGSearchEngine", "SEARCH_ENGINES"]

class _BaseSearchEngine:
"""Base class for a simple search engine interface."""
@@ -203,9 +204,26 @@ class YandexSearchEngine(_BaseSearchEngine):
except lxml.etree.Error as exc:
raise SearchQueryError("Yandex XML parse error: " + str(exc))

class DDGSearchEngine(_BaseSearchEngine):
"""A search engine interface with DuckDuckGo"""
name = "DDG"

def search(self, query):
"""Do a DuckDuckGo web search for *query*.

Returns a list of URLs ranked by relevance (as determined by DuckDuckGo).
"""
result = ddg(query, safesearch='Off', time='y', max_results=200)

try:
return [item["href"] for item in result]
except KeyError:
return []


SEARCH_ENGINES = {
"Bing": BingSearchEngine,
"Google": GoogleSearchEngine,
"Yandex": YandexSearchEngine
"Yandex": YandexSearchEngine,
"DDG": DDGSearchEngine
}

+ 1
- 0
setup.py Datei anzeigen

@@ -46,6 +46,7 @@ extra_deps = {
"nltk >= 3.6.1", # Parsing sentences to split article content
"pdfminer >= 20191125", # Extracting text from PDF files
"tldextract >= 3.1.0", # Getting domains for the multithreaded workers
"duckduckgo-search == 2.8.5", # DuckDuckGo search engine
],
"time": [
"pytz >= 2021.1", # Handling timezones for the !time IRC command


Laden…
Abbrechen
Speichern