Ver a proveniência

Add DuckDuckGo search engine, add `extra_deps`

pull/79/head
TheresNoTime há 1 ano
ascendente
cometimento
b3ef06c0f5
Não foi encontrada uma chave conhecida para esta assinatura, na base de dados ID da chave GPG: 2C15A644ABE9A27E
2 ficheiros alterados com 21 adições e 2 eliminações
  1. +20
    -2
      earwigbot/wiki/copyvios/search.py
  2. +1
    -0
      setup.py

+ 20
- 2
earwigbot/wiki/copyvios/search.py Ver ficheiro

@@ -27,13 +27,14 @@ from socket import error
from io import StringIO
from urllib.parse import quote, urlencode
from urllib.error import URLError
from duckduckgo_search import ddg

from earwigbot import importer
from earwigbot.exceptions import SearchQueryError

lxml = importer.new("lxml")

__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "SEARCH_ENGINES"]
__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "DDGSearchEngine", "SEARCH_ENGINES"]

class _BaseSearchEngine:
"""Base class for a simple search engine interface."""
@@ -203,9 +204,26 @@ class YandexSearchEngine(_BaseSearchEngine):
except lxml.etree.Error as exc:
raise SearchQueryError("Yandex XML parse error: " + str(exc))

class DDGSearchEngine(_BaseSearchEngine):
"""A search engine interface with DuckDuckGo"""
name = "DDG"

def search(self, query):
"""Do a DuckDuckGo web search for *query*.

Returns a list of URLs ranked by relevance (as determined by DuckDuckGo).
"""
result = ddg(query, safesearch='Off', time='y', max_results=200)

try:
return [item["href"] for item in result]
except KeyError:
return []


SEARCH_ENGINES = {
"Bing": BingSearchEngine,
"Google": GoogleSearchEngine,
"Yandex": YandexSearchEngine
"Yandex": YandexSearchEngine,
"DDG": DDGSearchEngine
}

+ 1
- 0
setup.py Ver ficheiro

@@ -46,6 +46,7 @@ extra_deps = {
"nltk >= 3.6.1", # Parsing sentences to split article content
"pdfminer >= 20191125", # Extracting text from PDF files
"tldextract >= 3.1.0", # Getting domains for the multithreaded workers
"duckduckgo-search == 2.8.5", # DuckDuckGo search engine
],
"time": [
"pytz >= 2021.1", # Handling timezones for the !time IRC command


Carregando…
Cancelar
Guardar