Browse Source

Add DuckDuckGo search engine, add `extra_deps`

pull/79/head
TheresNoTime 1 year ago
parent
commit
b3ef06c0f5
No known key found for this signature in database GPG Key ID: 2C15A644ABE9A27E
2 changed files with 21 additions and 2 deletions
  1. +20
    -2
      earwigbot/wiki/copyvios/search.py
  2. +1
    -0
      setup.py

+ 20
- 2
earwigbot/wiki/copyvios/search.py View File

@@ -27,13 +27,14 @@ from socket import error
from io import StringIO from io import StringIO
from urllib.parse import quote, urlencode from urllib.parse import quote, urlencode
from urllib.error import URLError from urllib.error import URLError
from duckduckgo_search import ddg


from earwigbot import importer from earwigbot import importer
from earwigbot.exceptions import SearchQueryError from earwigbot.exceptions import SearchQueryError


lxml = importer.new("lxml") lxml = importer.new("lxml")


__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "SEARCH_ENGINES"]
__all__ = ["BingSearchEngine", "GoogleSearchEngine", "YandexSearchEngine", "DDGSearchEngine", "SEARCH_ENGINES"]


class _BaseSearchEngine: class _BaseSearchEngine:
"""Base class for a simple search engine interface.""" """Base class for a simple search engine interface."""
@@ -203,9 +204,26 @@ class YandexSearchEngine(_BaseSearchEngine):
except lxml.etree.Error as exc: except lxml.etree.Error as exc:
raise SearchQueryError("Yandex XML parse error: " + str(exc)) raise SearchQueryError("Yandex XML parse error: " + str(exc))


class DDGSearchEngine(_BaseSearchEngine):
"""A search engine interface with DuckDuckGo"""
name = "DDG"

def search(self, query):
"""Do a DuckDuckGo web search for *query*.

Returns a list of URLs ranked by relevance (as determined by DuckDuckGo).
"""
result = ddg(query, safesearch='Off', time='y', max_results=200)

try:
return [item["href"] for item in result]
except KeyError:
return []



SEARCH_ENGINES = { SEARCH_ENGINES = {
"Bing": BingSearchEngine, "Bing": BingSearchEngine,
"Google": GoogleSearchEngine, "Google": GoogleSearchEngine,
"Yandex": YandexSearchEngine
"Yandex": YandexSearchEngine,
"DDG": DDGSearchEngine
} }

+ 1
- 0
setup.py View File

@@ -46,6 +46,7 @@ extra_deps = {
"nltk >= 3.6.1", # Parsing sentences to split article content "nltk >= 3.6.1", # Parsing sentences to split article content
"pdfminer >= 20191125", # Extracting text from PDF files "pdfminer >= 20191125", # Extracting text from PDF files
"tldextract >= 3.1.0", # Getting domains for the multithreaded workers "tldextract >= 3.1.0", # Getting domains for the multithreaded workers
"duckduckgo-search == 2.8.5", # DuckDuckGo search engine
], ],
"time": [ "time": [
"pytz >= 2021.1", # Handling timezones for the !time IRC command "pytz >= 2021.1", # Handling timezones for the !time IRC command


Loading…
Cancel
Save