Kaynağa Gözat

Add DuckDuckGo search engine, add `extra_deps`

pull/79/head
TheresNoTime 1 yıl önce
ebeveyn
işleme
b3ef06c0f5
Veri tabanında bu imza için bilinen anahtar bulunamadı GPG Anahtar Kimliği: 2C15A644ABE9A27E
2 değiştirilmiş dosya ile 21 ekleme ve 2 silme
  1. +20
    -2
      earwigbot/wiki/copyvios/search.py
  2. +1
    -0
      setup.py

+ 20
- 2
earwigbot/wiki/copyvios/search.py Dosyayı Görüntüle

@@ -27,13 +27,14 @@ from socket import error
from io import StringIO
from urllib.parse import quote, urlencode
from urllib.error import URLError
from duckduckgo_search import ddg

from earwigbot import importer
from earwigbot.exceptions import SearchQueryError

lxml = importer.new("lxml")

__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "SEARCH_ENGINES"]
__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "DDGSearchEngine", "SEARCH_ENGINES"]

class _BaseSearchEngine:
"""Base class for a simple search engine interface."""
@@ -203,9 +204,26 @@ class YandexSearchEngine(_BaseSearchEngine):
except lxml.etree.Error as exc:
raise SearchQueryError("Yandex XML parse error: " + str(exc))

class DDGSearchEngine(_BaseSearchEngine):
"""A search engine interface with DuckDuckGo"""
name = "DDG"

def search(self, query):
"""Do a DuckDuckGo web search for *query*.

Returns a list of URLs ranked by relevance (as determined by DuckDuckGo).
"""
result = ddg(query, safesearch='Off', time='y', max_results=200)

try:
return [item["href"] for item in result]
except KeyError:
return []


SEARCH_ENGINES = {
"Bing": BingSearchEngine,
"Google": GoogleSearchEngine,
"Yandex": YandexSearchEngine
"Yandex": YandexSearchEngine,
"DDG": DDGSearchEngine
}

+ 1
- 0
setup.py Dosyayı Görüntüle

@@ -46,6 +46,7 @@ extra_deps = {
"nltk >= 3.6.1", # Parsing sentences to split article content
"pdfminer >= 20191125", # Extracting text from PDF files
"tldextract >= 3.1.0", # Getting domains for the multithreaded workers
"duckduckgo-search == 2.8.5", # DuckDuckGo search engine
],
"time": [
"pytz >= 2021.1", # Handling timezones for the !time IRC command


Yükleniyor…
İptal
Kaydet