diff --git a/pyproject.toml b/pyproject.toml
index 10a3734..4a01eb3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,10 +59,6 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 
 [tool.pyright]
-exclude = [
-    # TODO
-    "src/earwigbot/wiki/copyvios"
-]
 pythonVersion = "3.11"
 venvPath = "."
 venv = "venv"
diff --git a/src/earwigbot/wiki/copyvios/__init__.py b/src/earwigbot/wiki/copyvios/__init__.py
index 4602e28..b1715e4 100644
--- a/src/earwigbot/wiki/copyvios/__init__.py
+++ b/src/earwigbot/wiki/copyvios/__init__.py
@@ -18,208 +18,142 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+__all__ = [
+    "DEFAULT_DEGREE",
+    "CopyvioChecker",
+    "CopyvioCheckResult",
+    "globalize",
+    "localize",
+]
+
+import functools
+import logging
 import time
-from urllib.request import build_opener
+from collections.abc import Callable
 
-from earwigbot import exceptions
-from earwigbot.wiki.copyvios.markov import MarkovChain
-from earwigbot.wiki.copyvios.parsers import ArticleTextParser
-from earwigbot.wiki.copyvios.search import SEARCH_ENGINES
+from earwigbot.wiki.copyvios.exclusions import ExclusionsDB
+from earwigbot.wiki.copyvios.markov import DEFAULT_DEGREE, MarkovChain
+from earwigbot.wiki.copyvios.parsers import ArticleParser, ParserArgs
+from earwigbot.wiki.copyvios.result import CopyvioCheckResult
+from earwigbot.wiki.copyvios.search import SearchEngine, get_search_engine
 from earwigbot.wiki.copyvios.workers import CopyvioWorkspace, globalize, localize
+from earwigbot.wiki.page import Page
 
-__all__ = ["CopyvioMixIn", "globalize", "localize"]
 
-
-class CopyvioMixIn:
+class CopyvioChecker:
     """
-    **EarwigBot: Wiki Toolset: Copyright Violation MixIn**
+    Manages the lifecycle of a copyvio check or comparison.
 
-    This is a mixin that provides two public methods, :py:meth:`copyvio_check`
-    and :py:meth:`copyvio_compare`. The former checks the page for copyright
-    violations using a search engine API, and the latter compares the page
-    against a given URL. Credentials for the search engine API are stored in
-    the :py:class:`~earwigbot.wiki.site.Site`'s config.
+    Created by :py:class:`~earwigbot.wiki.page.Page` and handles the implementation
+    details of running a check.
     """
 
-    def __init__(self, site):
-        self._search_config = site._search_config
-        self._exclusions_db = self._search_config.get("exclusions_db")
-        self._addheaders = [
-            ("User-Agent", site.user_agent),
+    def __init__(
+        self,
+        page: Page,
+        *,
+        min_confidence: float = 0.75,
+        max_time: float = 30,
+        degree: int = DEFAULT_DEGREE,
+        logger: logging.Logger | None = None,
+    ) -> None:
+        self._page = page
+        self._site = page.site
+        self._config = page.site._search_config
+        self._min_confidence = min_confidence
+        self._max_time = max_time
+        self._degree = degree
+        self._logger = logger or logging.getLogger("earwigbot.wiki")
+
+        self._headers = [
+            ("User-Agent", page.site.user_agent),
             ("Accept-Encoding", "gzip"),
         ]
 
-    def _get_search_engine(self):
-        """Return a function that can be called to do web searches.
-
-        The function takes one argument, a search query, and returns a list of
-        URLs, ranked by importance. The underlying logic depends on the
-        *engine* argument within our config; for example, if *engine* is
-        "Yahoo! BOSS", we'll use YahooBOSSSearchEngine for querying.
-
-        Raises UnknownSearchEngineError if the 'engine' listed in our config is
-        unknown to us, and UnsupportedSearchEngineError if we are missing a
-        required package or module, like oauth2 for "Yahoo! BOSS".
-        """
-        engine = self._search_config["engine"]
-        if engine not in SEARCH_ENGINES:
-            raise exceptions.UnknownSearchEngineError(engine)
-
-        klass = SEARCH_ENGINES[engine]
-        credentials = self._search_config["credentials"]
-        opener = build_opener()
-        opener.addheaders = self._addheaders
-
-        for dep in klass.requirements():
-            try:
-                __import__(dep).__name__
-            except (ModuleNotFoundError, AttributeError):
-                e = "Missing a required dependency ({}) for the {} engine"
-                e = e.format(dep, engine)
-                raise exceptions.UnsupportedSearchEngineError(e)
-
-        return klass(credentials, opener)
-
-    def copyvio_check(
-        self,
-        min_confidence=0.75,
-        max_queries=15,
-        max_time=-1,
-        no_searches=False,
-        no_links=False,
-        short_circuit=True,
-        degree=5,
-    ):
-        """Check the page for copyright violations.
-
-        Returns a :class:`.CopyvioCheckResult` object with information on the
-        results of the check.
-
-        *min_confidence* is the minimum amount of confidence we must have in
-        the similarity between a source text and the article in order for us to
-        consider it a suspected violation. This is a number between 0 and 1.
-
-        *max_queries* is self-explanatory; we will never make more than this
-        number of queries in a given check.
-
-        *max_time* can be set to prevent copyvio checks from taking longer than
-        a set amount of time (generally around a minute), which can be useful
-        if checks are called through a web server with timeouts. We will stop
-        checking new URLs as soon as this limit is reached.
-
-        Setting *no_searches* to ``True`` will cause only URLs in the wikitext
-        of the page to be checked; no search engine queries will be made.
-        Setting *no_links* to ``True`` will cause the opposite to happen: URLs
-        in the wikitext will be ignored; search engine queries will be made
-        only. Setting both of these to ``True`` is pointless.
-
-        Normally, the checker will short-circuit if it finds a URL that meets
-        *min_confidence*. This behavior normally causes it to skip any
-        remaining URLs and web queries, but setting *short_circuit* to
-        ``False`` will prevent this.
-
-        Raises :exc:`.CopyvioCheckError` or subclasses
-        (:exc:`.UnknownSearchEngineError`, :exc:`.SearchQueryError`, ...) on
-        errors.
-        """
-        log = "Starting copyvio check for [[{0}]]"
-        self._logger.info(log.format(self.title))
-        searcher = self._get_search_engine()
-        parser = ArticleTextParser(
-            self.get(),
-            args={"nltk_dir": self._search_config["nltk_dir"], "lang": self._site.lang},
+        self._parser = ArticleParser(
+            self._page.get(),
+            lang=self._site.lang,
+            nltk_dir=self._config["nltk_dir"],
         )
-        article = MarkovChain(parser.strip(), degree=degree)
-        parser_args = {}
+        self._article = MarkovChain(self._parser.strip(), degree=self._degree)
 
-        if self._exclusions_db:
-            self._exclusions_db.sync(self.site.name)
+    @functools.cached_property
+    def _searcher(self) -> SearchEngine:
+        return get_search_engine(self._config, self._headers)
 
-            def exclude(u):
-                return self._exclusions_db.check(self.site.name, u)
+    @property
+    def _exclusions_db(self) -> ExclusionsDB | None:
+        return self._config.get("exclusions_db")
 
-            parser_args["mirror_hints"] = self._exclusions_db.get_mirror_hints(self)
-        else:
-            exclude = None
+    def _get_exclusion_callback(self) -> Callable[[str], bool] | None:
+        if not self._exclusions_db:
+            return None
+        return functools.partial(self._exclusions_db.check, self._site.name)
+
+    def run_check(
+        self,
+        *,
+        max_queries: int = 15,
+        no_searches: bool = False,
+        no_links: bool = False,
+        short_circuit: bool = True,
+    ) -> CopyvioCheckResult:
+        parser_args: ParserArgs = {}
+        if self._exclusions_db:
+            self._exclusions_db.sync(self._site.name)
+            mirror_hints = self._exclusions_db.get_mirror_hints(self._page)
+            parser_args["mirror_hints"] = mirror_hints
 
         workspace = CopyvioWorkspace(
-            article,
-            min_confidence,
-            max_time,
-            self._logger,
-            self._addheaders,
+            self._article,
+            min_confidence=self._min_confidence,
+            max_time=self._max_time,
+            logger=self._logger,
+            headers=self._headers,
             short_circuit=short_circuit,
             parser_args=parser_args,
-            exclude_check=exclude,
-            config=self._search_config,
-            degree=degree,
+            exclusion_callback=self._get_exclusion_callback(),
+            config=self._config,
+            degree=self._degree,
         )
 
-        if article.size < 20:  # Auto-fail very small articles
-            result = workspace.get_result()
-            self._logger.info(result.get_log_message(self.title))
-            return result
+        if self._article.size < 20:  # Auto-fail very small articles
+            return workspace.get_result()
 
         if not no_links:
-            workspace.enqueue(parser.get_links())
+            workspace.enqueue(self._parser.get_links())
         num_queries = 0
         if not no_searches:
-            chunks = parser.chunk(max_queries)
+            chunks = self._parser.chunk(max_queries)
             for chunk in chunks:
                 if short_circuit and workspace.finished:
                     workspace.possible_miss = True
                     break
-                log = "[[{0}]] -> querying {1} for {2!r}"
-                self._logger.debug(log.format(self.title, searcher.name, chunk))
-                workspace.enqueue(searcher.search(chunk))
+                self._logger.debug(
+                    f"[[{self._page.title}]] -> querying {self._searcher.name} "
+                    f"for {chunk!r}"
+                )
+                workspace.enqueue(self._searcher.search(chunk))
                 num_queries += 1
-                time.sleep(1)
+                time.sleep(1)  # TODO: Check whether this is needed
 
         workspace.wait()
-        result = workspace.get_result(num_queries)
-        self._logger.info(result.get_log_message(self.title))
-        return result
-
-    def copyvio_compare(self, urls, min_confidence=0.75, max_time=30, degree=5):
-        """Check the page like :py:meth:`copyvio_check` against specific URLs.
-
-        This is essentially a reduced version of :meth:`copyvio_check` - a
-        copyivo comparison is made using Markov chains and the result is
-        returned in a :class:`.CopyvioCheckResult` object - but without using a
-        search engine, since the suspected "violated" URL is supplied from the
-        start.
-
-        Its primary use is to generate a result when the URL is retrieved from
-        a cache, like the one used in EarwigBot's Tool Labs site. After a
-        search is done, the resulting URL is stored in a cache for 72 hours so
-        future checks against that page will not require another set of
-        time-and-money-consuming search engine queries. However, the comparison
-        itself (which includes the article's and the source's content) cannot
-        be stored for data retention reasons, so a fresh comparison is made
-        using this function.
-
-        Since no searching is done, neither :exc:`.UnknownSearchEngineError`
-        nor :exc:`.SearchQueryError` will be raised.
-        """
-        if not isinstance(urls, list):
-            urls = [urls]
-        log = "Starting copyvio compare for [[{0}]] against {1}"
-        self._logger.info(log.format(self.title, ", ".join(urls)))
-        article = MarkovChain(ArticleTextParser(self.get()).strip(), degree=degree)
+        return workspace.get_result(num_queries)
+
+    def run_compare(self, urls: list[str]) -> CopyvioCheckResult:
         workspace = CopyvioWorkspace(
-            article,
-            min_confidence,
-            max_time,
-            self._logger,
-            self._addheaders,
-            max_time,
+            self._article,
+            min_confidence=self._min_confidence,
+            max_time=self._max_time,
+            logger=self._logger,
+            headers=self._headers,
+            url_timeout=self._max_time,
             num_workers=min(len(urls), 8),
             short_circuit=False,
-            config=self._search_config,
-            degree=degree,
+            config=self._config,
+            degree=self._degree,
         )
+
         workspace.enqueue(urls)
         workspace.wait()
-        result = workspace.get_result()
-        self._logger.info(result.get_log_message(self.title))
-        return result
+        return workspace.get_result()
diff --git a/src/earwigbot/wiki/copyvios/exclusions.py b/src/earwigbot/wiki/copyvios/exclusions.py
index 6634cf0..f576620 100644
--- a/src/earwigbot/wiki/copyvios/exclusions.py
+++ b/src/earwigbot/wiki/copyvios/exclusions.py
@@ -18,15 +18,24 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import annotations
+
+__all__ = ["ExclusionsDB"]
+
+import logging
 import re
 import sqlite3
 import threading
 import time
+import typing
 import urllib.parse
 
 from earwigbot import exceptions
 
-__all__ = ["ExclusionsDB"]
+if typing.TYPE_CHECKING:
+    from earwigbot.wiki.page import Page
+    from earwigbot.wiki.site import Site
+    from earwigbot.wiki.sitesdb import SitesDB
 
 DEFAULT_SOURCES = {
     "all": [  # Applies to all, but located on enwiki
@@ -52,26 +61,28 @@ class ExclusionsDB:
     """
     **EarwigBot: Wiki Toolset: Exclusions Database Manager**
 
-    Controls the :file:`exclusions.db` file, which stores URLs excluded from
-    copyright violation checks on account of being known mirrors, for example.
+    Controls the :file:`exclusions.db` file, which stores URLs excluded from copyright
+    violation checks on account of being known mirrors, for example.
     """
 
-    def __init__(self, sitesdb, dbfile, logger):
+    def __init__(self, sitesdb: SitesDB, dbfile: str, logger: logging.Logger) -> None:
         self._sitesdb = sitesdb
         self._dbfile = dbfile
         self._logger = logger
         self._db_access_lock = threading.Lock()
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Return the canonical string representation of the ExclusionsDB."""
-        res = "ExclusionsDB(sitesdb={0!r}, dbfile={1!r}, logger={2!r})"
-        return res.format(self._sitesdb, self._dbfile, self._logger)
+        return (
+            f"ExclusionsDB(sitesdb={self._sitesdb!r}, dbfile={self._dbfile!r}, "
+            f"logger={self._logger!r})"
+        )
 
-    def __str__(self):
+    def __str__(self) -> str:
         """Return a nice string representation of the ExclusionsDB."""
         return f"<ExclusionsDB at {self._dbfile}>"
 
-    def _create(self):
+    def _create(self) -> None:
         """Initialize the exclusions database with its necessary tables."""
         script = """
             CREATE TABLE sources (source_sitename, source_page);
@@ -79,7 +90,7 @@ class ExclusionsDB:
             CREATE TABLE exclusions (exclusion_sitename, exclusion_url);
         """
         query = "INSERT INTO sources VALUES (?, ?);"
-        sources = []
+        sources: list[tuple[str, str]] = []
         for sitename, pages in DEFAULT_SOURCES.items():
             for page in pages:
                 sources.append((sitename, page))
@@ -88,9 +99,9 @@ class ExclusionsDB:
             conn.executescript(script)
             conn.executemany(query, sources)
 
-    def _load_source(self, site, source):
+    def _load_source(self, site: Site, source: str) -> set[str]:
         """Load from a specific source and return a set of URLs."""
-        urls = set()
+        urls: set[str] = set()
         try:
             data = site.get_page(source, follow_redirects=True).get()
         except exceptions.PageNotFoundError:
@@ -123,7 +134,7 @@ class ExclusionsDB:
                         urls.add(url)
         return urls
 
-    def _update(self, sitename):
+    def _update(self, sitename: str) -> None:
         """Update the database from listed sources in the index."""
         query1 = "SELECT source_page FROM sources WHERE source_sitename = ?"
         query2 = "SELECT exclusion_url FROM exclusions WHERE exclusion_sitename = ?"
@@ -140,7 +151,7 @@ class ExclusionsDB:
         else:
             site = self._sitesdb.get_site(sitename)
         with self._db_access_lock, sqlite3.connect(self._dbfile) as conn:
-            urls = set()
+            urls: set[str] = set()
             for (source,) in conn.execute(query1, (sitename,)):
                 urls |= self._load_source(site, source)
             for (url,) in conn.execute(query2, (sitename,)):
@@ -154,7 +165,7 @@ class ExclusionsDB:
             else:
                 conn.execute(query7, (sitename, int(time.time())))
 
-    def _get_last_update(self, sitename):
+    def _get_last_update(self, sitename: str) -> int:
         """Return the UNIX timestamp of the last time the db was updated."""
         query = "SELECT update_time FROM updates WHERE update_sitename = ?"
         with self._db_access_lock, sqlite3.connect(self._dbfile) as conn:
@@ -165,28 +176,34 @@ class ExclusionsDB:
                 return 0
             return result[0] if result else 0
 
-    def sync(self, sitename, force=False):
-        """Update the database if it hasn't been updated recently.
+    def sync(self, sitename: str, force: bool = False) -> None:
+        """
+        Update the database if it hasn't been updated recently.
 
         This updates the exclusions database for the site *sitename* and "all".
 
-        Site-specific lists are considered stale after 48 hours; global lists
-        after 12 hours.
+        Site-specific lists are considered stale after 48 hours; global lists after
+        12 hours.
         """
         max_staleness = 60 * 60 * (12 if sitename == "all" else 48)
         time_since_update = int(time.time() - self._get_last_update(sitename))
         if force or time_since_update > max_staleness:
-            log = "Updating stale database: {0} (last updated {1} seconds ago)"
-            self._logger.info(log.format(sitename, time_since_update))
+            self._logger.info(
+                f"Updating stale database: {sitename} (last updated "
+                f"{time_since_update} seconds ago)"
+            )
             self._update(sitename)
         else:
-            log = "Database for {0} is still fresh (last updated {1} seconds ago)"
-            self._logger.debug(log.format(sitename, time_since_update))
+            self._logger.debug(
+                f"Database for {sitename} is still fresh (last updated "
+                f"{time_since_update} seconds ago)"
+            )
         if sitename != "all":
             self.sync("all", force=force)
 
-    def check(self, sitename, url):
-        """Check whether a given URL is in the exclusions database.
+    def check(self, sitename: str, url: str) -> bool:
+        """
+        Check whether a given URL is in the exclusions database.
 
         Return ``True`` if the URL is in the database, or ``False`` otherwise.
         """
@@ -216,19 +233,18 @@ class ExclusionsDB:
                 else:
                     matches = normalized.startswith(excl)
                 if matches:
-                    log = "Exclusion detected in {0} for {1}"
-                    self._logger.debug(log.format(sitename, url))
+                    self._logger.debug(f"Exclusion detected in {sitename} for {url}")
                     return True
 
-        log = f"No exclusions in {sitename} for {url}"
-        self._logger.debug(log)
+        self._logger.debug(f"No exclusions in {sitename} for {url}")
         return False
 
-    def get_mirror_hints(self, page, try_mobile=True):
-        """Return a list of strings that indicate the existence of a mirror.
+    def get_mirror_hints(self, page: Page, try_mobile: bool = True) -> list[str]:
+        """
+        Return a list of strings that indicate the existence of a mirror.
 
-        The source parser checks for the presence of these strings inside of
-        certain HTML tag attributes (``"href"`` and ``"src"``).
+        The source parser checks for the presence of these strings inside of certain
+        HTML tag attributes (``"href"`` and ``"src"``).
         """
         site = page.site
         path = urllib.parse.urlparse(page.url).path
@@ -238,10 +254,10 @@ class ExclusionsDB:
         if try_mobile:
             fragments = re.search(r"^([\w]+)\.([\w]+).([\w]+)$", site.domain)
             if fragments:
-                roots.append("{}.m.{}.{}".format(*fragments.groups()))
+                roots.append(f"{fragments[1]}.m.{fragments[2]}.{fragments[3]}")
 
         general = [
-            root + site._script_path + "/" + script
+            root + site.script_path + "/" + script
             for root in roots
             for script in scripts
         ]
diff --git a/src/earwigbot/wiki/copyvios/markov.py b/src/earwigbot/wiki/copyvios/markov.py
index 5cf7a7f..f08195c 100644
--- a/src/earwigbot/wiki/copyvios/markov.py
+++ b/src/earwigbot/wiki/copyvios/markov.py
@@ -18,29 +18,44 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+__all__ = [
+    "DEFAULT_DEGREE",
+    "EMPTY",
+    "EMPTY_INTERSECTION",
+    "MarkovChain",
+    "MarkovChainIntersection",
+]
+
 import re
+from collections.abc import Iterable
+from enum import Enum
 
-__all__ = ["EMPTY", "EMPTY_INTERSECTION", "MarkovChain", "MarkovChainIntersection"]
+DEFAULT_DEGREE = 5
 
 
-class MarkovChain:
-    """Implements a basic ngram Markov chain of words."""
-
+class Sentinel(Enum):
     START = -1
     END = -2
 
-    def __init__(self, text, degree=5):
+
+RawChain = dict[tuple[str | Sentinel, ...], int]
+
+
+class MarkovChain:
+    """Implements a basic ngram Markov chain of words."""
+
+    def __init__(self, text: str, degree: int = DEFAULT_DEGREE) -> None:
         self.text = text
         self.degree = degree  # 2 for bigrams, 3 for trigrams, etc.
         self.chain = self._build()
         self.size = self._get_size()
 
-    def _build(self):
+    def _build(self) -> RawChain:
         """Build and return the Markov chain from the input text."""
         padding = self.degree - 1
-        words = re.sub(r"[^\w\s-]", "", self.text.lower(), flags=re.UNICODE).split()
-        words = ([self.START] * padding) + words + ([self.END] * padding)
-        chain = {}
+        words = re.sub(r"[^\w\s-]", "", self.text.lower()).split()
+        words = ([Sentinel.START] * padding) + words + ([Sentinel.END] * padding)
+        chain: RawChain = {}
 
         for i in range(len(words) - self.degree + 1):
             phrase = tuple(words[i : i + self.degree])
@@ -50,15 +65,15 @@ class MarkovChain:
                 chain[phrase] = 1
         return chain
 
-    def _get_size(self):
+    def _get_size(self) -> int:
         """Return the size of the Markov chain: the total number of nodes."""
         return sum(self.chain.values())
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Return the canonical string representation of the MarkovChain."""
         return f"MarkovChain(text={self.text!r})"
 
-    def __str__(self):
+    def __str__(self) -> str:
         """Return a nice string representation of the MarkovChain."""
         return f"<MarkovChain of size {self.size}>"
 
@@ -66,61 +81,60 @@ class MarkovChain:
 class MarkovChainIntersection(MarkovChain):
     """Implements the intersection of two chains (i.e., their shared nodes)."""
 
-    def __init__(self, mc1, mc2):
+    def __init__(self, mc1: MarkovChain, mc2: MarkovChain) -> None:
         self.mc1, self.mc2 = mc1, mc2
         self.chain = self._build()
         self.size = self._get_size()
 
-    def _build(self):
+    def _build(self) -> RawChain:
         """Build and return the Markov chain from the input chains."""
         c1 = self.mc1.chain
         c2 = self.mc2.chain
-        chain = {}
+        chain: RawChain = {}
 
         for phrase in c1:
             if phrase in c2:
                 chain[phrase] = min(c1[phrase], c2[phrase])
         return chain
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Return the canonical string representation of the intersection."""
-        res = "MarkovChainIntersection(mc1={0!r}, mc2={1!r})"
-        return res.format(self.mc1, self.mc2)
+        return f"MarkovChainIntersection(mc1={self.mc1!r}, mc2={self.mc2!r})"
 
-    def __str__(self):
+    def __str__(self) -> str:
         """Return a nice string representation of the intersection."""
-        res = "<MarkovChainIntersection of size {0} ({1} ^ {2})>"
-        return res.format(self.size, self.mc1, self.mc2)
+        return (
+            f"<MarkovChainIntersection of size {self.size} ({self.mc1} ^ {self.mc2})>"
+        )
 
 
 class MarkovChainUnion(MarkovChain):
     """Implemented the union of multiple chains."""
 
-    def __init__(self, chains):
+    def __init__(self, chains: Iterable[MarkovChain]) -> None:
         self.chains = list(chains)
         self.chain = self._build()
         self.size = self._get_size()
 
-    def _build(self):
+    def _build(self) -> RawChain:
         """Build and return the Markov chain from the input chains."""
-        union = {}
+        union: RawChain = {}
         for chain in self.chains:
-            for phrase, count in chain.chain.iteritems():
+            for phrase, count in chain.chain.items():
                 if phrase in union:
                     union[phrase] += count
                 else:
                     union[phrase] = count
         return union
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Return the canonical string representation of the union."""
-        res = "MarkovChainUnion(chains={!r})"
-        return res.format(self.chains)
+        return f"MarkovChainUnion(chains={self.chains!r})"
 
-    def __str__(self):
+    def __str__(self) -> str:
         """Return a nice string representation of the union."""
-        res = "<MarkovChainUnion of size {} ({})>"
-        return res.format(self.size, "| ".join(str(chain) for chain in self.chains))
+        chains = " | ".join(str(chain) for chain in self.chains)
+        return f"<MarkovChainUnion of size {self.size} ({chains})>"
 
 
 EMPTY = MarkovChain("")
diff --git a/src/earwigbot/wiki/copyvios/parsers.py b/src/earwigbot/wiki/copyvios/parsers.py
index 09553e6..dc8fcad 100644
--- a/src/earwigbot/wiki/copyvios/parsers.py
+++ b/src/earwigbot/wiki/copyvios/parsers.py
@@ -18,44 +18,34 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import annotations
+
+__all__ = ["ArticleParser", "get_parser"]
+
 import io
 import json
 import os.path
 import re
+import typing
 import urllib.parse
 import urllib.request
+from abc import ABC, abstractmethod
+from collections.abc import Callable
+from typing import Any, ClassVar, Literal, TypedDict
 
 import mwparserfromhell
 
 from earwigbot.exceptions import ParserExclusionError, ParserRedirectError
 
-__all__ = ["ArticleTextParser", "get_parser"]
-
-
-class _BaseTextParser:
-    """Base class for a parser that handles text."""
-
-    TYPE = None
-
-    def __init__(self, text, url=None, args=None):
-        self.text = text
-        self.url = url
-        self._args = args or {}
-
-    def __repr__(self):
-        """Return the canonical string representation of the text parser."""
-        return f"{self.__class__.__name__}(text={self.text!r})"
+if typing.TYPE_CHECKING:
+    import bs4
 
-    def __str__(self):
-        """Return a nice string representation of the text parser."""
-        name = self.__class__.__name__
-        return f"<{name} of text with size {len(self.text)}>"
+    from earwigbot.wiki.copyvios.workers import OpenedURL
 
 
-class ArticleTextParser(_BaseTextParser):
+class ArticleParser:
     """A parser that can strip and chunk wikicode article text."""
 
-    TYPE = "Article"
     TEMPLATE_MERGE_THRESHOLD = 35
     NLTK_DEFAULT = "english"
     NLTK_LANGS = {
@@ -78,7 +68,18 @@ class ArticleTextParser(_BaseTextParser):
         "tr": "turkish",
     }
 
-    def _merge_templates(self, code):
+    def __init__(self, text: str, lang: str, nltk_dir: str) -> None:
+        self.text = text
+        self._lang = lang
+        self._nltk_dir = nltk_dir
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(text={self.text!r})"
+
+    def __str__(self) -> str:
+        return f"<{self.__class__.__name__} of text with size {len(self.text)}>"
+
+    def _merge_templates(self, code: mwparserfromhell.wikicode.Wikicode) -> None:
         """Merge template contents in to wikicode when the values are long."""
         for template in code.filter_templates(recursive=code.RECURSE_OTHERS):
             chunks = []
@@ -92,23 +93,25 @@ class ArticleTextParser(_BaseTextParser):
             else:
                 code.remove(template)
 
-    def _get_tokenizer(self):
+    def _get_tokenizer(self) -> Any:
         """Return a NLTK punctuation tokenizer for the article's language."""
         import nltk
 
-        def datafile(lang):
+        def datafile(lang: str) -> str:
             return "file:" + os.path.join(
-                self._args["nltk_dir"], "tokenizers", "punkt", lang + ".pickle"
+                self._nltk_dir, "tokenizers", "punkt", lang + ".pickle"
             )
 
-        lang = self.NLTK_LANGS.get(self._args.get("lang"), self.NLTK_DEFAULT)
+        lang = self.NLTK_LANGS.get(self._lang, self.NLTK_DEFAULT)
         try:
             nltk.data.load(datafile(self.NLTK_DEFAULT))
         except LookupError:
-            nltk.download("punkt", self._args["nltk_dir"])
+            nltk.download("punkt", self._nltk_dir)
         return nltk.data.load(datafile(lang))
 
-    def _get_sentences(self, min_query, max_query, split_thresh):
+    def _get_sentences(
+        self, min_query: int, max_query: int, split_thresh: int
+    ) -> list[str]:
         """Split the article text into sentences of a certain length."""
 
         def cut_sentence(words):
@@ -138,24 +141,27 @@ class ArticleTextParser(_BaseTextParser):
                 sentences.extend(cut_sentence(sentence.split()))
         return [sen for sen in sentences if len(sen) >= min_query]
 
-    def strip(self):
-        """Clean the page's raw text by removing templates and formatting.
+    def strip(self) -> str:
+        """
+        Clean the page's raw text by removing templates and formatting.
 
-        Return the page's text with all HTML and wikicode formatting removed,
-        including templates, tables, and references. It retains punctuation
-        (spacing, paragraphs, periods, commas, (semi)-colons, parentheses,
-        quotes), original capitalization, and so forth. HTML entities are
-        replaced by their unicode equivalents.
+        Return the page's text with all HTML and wikicode formatting removed, including
+        templates, tables, and references. It retains punctuation (spacing, paragraphs,
+        periods, commas, (semi)-colons, parentheses, quotes), original capitalization,
+        and so forth. HTML entities are replaced by their unicode equivalents.
 
         The actual stripping is handled by :py:mod:`mwparserfromhell`.
         """
 
-        def remove(code, node):
-            """Remove a node from a code object, ignoring ValueError.
+        def remove(
+            code: mwparserfromhell.wikicode.Wikicode, node: mwparserfromhell.nodes.Node
+        ) -> None:
+            """
+            Remove a node from a code object, ignoring ValueError.
 
-            Sometimes we will remove a node that contains another node we wish
-            to remove, and we fail when we try to remove the inner one. Easiest
-            solution is to just ignore the exception.
+            Sometimes we will remove a node that contains another node we wish to
+            remove, and we fail when we try to remove the inner one. Easiest solution
+            is to just ignore the exception.
             """
             try:
                 code.remove(node)
@@ -181,26 +187,32 @@ class ArticleTextParser(_BaseTextParser):
         self.clean = re.sub(r"\n\n+", "\n", clean).strip()
         return self.clean
 
-    def chunk(self, max_chunks, min_query=8, max_query=128, split_thresh=32):
-        """Convert the clean article text into a list of web-searchable chunks.
-
-        No greater than *max_chunks* will be returned. Each chunk will only be
-        a sentence or two long at most (no more than *max_query*). The idea is
-        to return a sample of the article text rather than the whole, so we'll
-        pick and choose from parts of it, especially if the article is large
-        and *max_chunks* is low, so we don't end up just searching for just the
-        first paragraph.
-
-        This is implemented using :py:mod:`nltk` (https://nltk.org/). A base
-        directory (*nltk_dir*) is required to store nltk's punctuation
-        database, and should be passed as an argument to the constructor. It is
-        typically located in the bot's working directory.
+    def chunk(
+        self,
+        max_chunks: int,
+        min_query: int = 8,
+        max_query: int = 128,
+        split_thresh: int = 32,
+    ) -> list[str]:
+        """
+        Convert the clean article text into a list of web-searchable chunks.
+
+        No greater than *max_chunks* will be returned. Each chunk will only be a
+        sentence or two long at most (no more than *max_query*). The idea is to return
+        a sample of the article text rather than the whole, so we'll pick and choose
+        from parts of it, especially if the article is large and *max_chunks* is low,
+        so we don't end up just searching for just the first paragraph.
+
+        This is implemented using :py:mod:`nltk` (https://nltk.org/). A base directory
+        (*nltk_dir*) is required to store nltk's punctuation database, and should be
+        passed as an argument to the constructor. It is typically located in the bot's
+        working directory.
         """
         sentences = self._get_sentences(min_query, max_query, split_thresh)
         if len(sentences) <= max_chunks:
             return sentences
 
-        chunks = []
+        chunks: list[str] = []
         while len(chunks) < max_chunks:
             if len(chunks) % 5 == 0:
                 chunk = sentences.pop(0)  # Pop from beginning
@@ -216,7 +228,8 @@ class ArticleTextParser(_BaseTextParser):
         return chunks
 
     def get_links(self):
-        """Return a list of all external links in the article.
+        """
+        Return a list of all external links in the article.
 
         The list is restricted to things that we suspect we can parse: i.e.,
         those with schemes of ``http`` and ``https``.
@@ -226,14 +239,42 @@ class ArticleTextParser(_BaseTextParser):
         return [str(link.url) for link in links if link.url.startswith(schemes)]
 
 
-class _HTMLParser(_BaseTextParser):
+class ParserArgs(TypedDict, total=False):
+    mirror_hints: list[str]
+    open_url: Callable[[str], OpenedURL | None]
+
+
+class SourceParser(ABC):
+    """Base class for a parser that handles text."""
+
+    TYPE: ClassVar[str]
+
+    def __init__(self, text: bytes, url: str, args: ParserArgs | None = None) -> None:
+        self.text = text
+        self.url = url
+        self._args = args or {}
+
+    def __repr__(self) -> str:
+        """Return the canonical string representation of the text parser."""
+        return f"{self.__class__.__name__}(text={self.text!r})"
+
+    def __str__(self) -> str:
+        """Return a nice string representation of the text parser."""
+        return f"<{self.__class__.__name__} of text with size {len(self.text)}>"
+
+    @abstractmethod
+    def parse(self) -> str: ...
+
+
+class HTMLParser(SourceParser):
     """A parser that can extract the text from an HTML document."""
 
     TYPE = "HTML"
     hidden_tags = ["script", "style"]
 
-    def _fail_if_mirror(self, soup):
-        """Look for obvious signs that the given soup is a wiki mirror.
+    def _fail_if_mirror(self, soup: bs4.BeautifulSoup) -> None:
+        """
+        Look for obvious signs that the given soup is a wiki mirror.
 
         If so, raise ParserExclusionError, which is caught in the workers and
         causes this source to excluded.
@@ -242,13 +283,14 @@ class _HTMLParser(_BaseTextParser):
             return
 
         def func(attr):
+            assert "mirror_hints" in self._args
             return attr and any(hint in attr for hint in self._args["mirror_hints"])
 
         if soup.find_all(href=func) or soup.find_all(src=func):
             raise ParserExclusionError()
 
     @staticmethod
-    def _get_soup(text):
+    def _get_soup(text: bytes) -> bs4.BeautifulSoup:
         """Parse some text using BeautifulSoup."""
         import bs4
 
@@ -257,11 +299,11 @@ class _HTMLParser(_BaseTextParser):
         except ValueError:
             return bs4.BeautifulSoup(text)
 
-    def _clean_soup(self, soup):
+    def _clean_soup(self, soup: bs4.element.Tag) -> str:
         """Clean a BeautifulSoup tree of invisible tags."""
         import bs4
 
-        def is_comment(text):
+        def is_comment(text: bs4.element.Tag) -> bool:
             return isinstance(text, bs4.element.Comment)
 
         for comment in soup.find_all(text=is_comment):
@@ -272,7 +314,7 @@ class _HTMLParser(_BaseTextParser):
 
         return "\n".join(s.replace("\n", " ") for s in soup.stripped_strings)
 
-    def _open(self, url, **kwargs):
+    def _open(self, url: str, **kwargs: Any) -> bytes | None:
         """Try to read a URL. Return None if it couldn't be read."""
         opener = self._args.get("open_url")
         if not opener:
@@ -280,13 +322,13 @@ class _HTMLParser(_BaseTextParser):
         result = opener(url, **kwargs)
         return result.content if result else None
 
-    def _load_from_blogspot(self, url):
+    def _load_from_blogspot(self, url: urllib.parse.ParseResult) -> str:
         """Load dynamic content from Blogger Dynamic Views."""
-        match = re.search(r"'postId': '(\d+)'", self.text)
+        match = re.search(rb"'postId': '(\d+)'", self.text)
         if not match:
             return ""
         post_id = match.group(1)
-        url = f"https://{url.netloc}/feeds/posts/default/{post_id}?"
+        feed_url = f"https://{url.netloc}/feeds/posts/default/{post_id}?"
         params = {
             "alt": "json",
             "v": "2",
@@ -294,7 +336,7 @@ class _HTMLParser(_BaseTextParser):
             "rewriteforssl": "true",
         }
         raw = self._open(
-            url + urllib.parse.urlencode(params),
+            feed_url + urllib.parse.urlencode(params),
             allow_content_types=["application/json"],
         )
         if raw is None:
@@ -308,19 +350,24 @@ class _HTMLParser(_BaseTextParser):
         except KeyError:
             return ""
         soup = self._get_soup(text)
+        if not soup.body:
+            return ""
         return self._clean_soup(soup.body)
 
-    def parse(self):
-        """Return the actual text contained within an HTML document.
+    def parse(self) -> str:
+        """
+        Return the actual text contained within an HTML document.
 
         Implemented using :py:mod:`BeautifulSoup <bs4>`
-        (https://www.crummy.com/software/BeautifulSoup/).
+        (https://pypi.org/project/beautifulsoup4/).
         """
+        import bs4
+
         url = urllib.parse.urlparse(self.url) if self.url else None
         soup = self._get_soup(self.text)
         if not soup.body:
-            # No <body> tag present in HTML ->
-            # no scrapable content (possibly JS or <iframe> magic):
+            # No <body> tag present in HTML -> # no scrapable content
+            # (possibly JS or <iframe> magic):
             return ""
 
         self._fail_if_mirror(soup)
@@ -328,7 +375,7 @@ class _HTMLParser(_BaseTextParser):
 
         if url and url.netloc == "web.archive.org" and url.path.endswith(".pdf"):
             playback = body.find(id="playback")
-            if playback and "src" in playback.attrs:
+            if isinstance(playback, bs4.element.Tag) and "src" in playback.attrs:
                 raise ParserRedirectError(playback.attrs["src"])
 
         content = self._clean_soup(body)
@@ -339,7 +386,7 @@ class _HTMLParser(_BaseTextParser):
         return content
 
 
-class _PDFParser(_BaseTextParser):
+class PDFParser(SourceParser):
     """A parser that can extract text from a PDF file."""
 
     TYPE = "PDF"
@@ -348,7 +395,7 @@ class _PDFParser(_BaseTextParser):
         ("\u2022", " "),
     ]
 
-    def parse(self):
+    def parse(self) -> str:
         """Return extracted text from the PDF."""
         from pdfminer import converter, pdfinterp, pdfpage
 
@@ -358,7 +405,7 @@ class _PDFParser(_BaseTextParser):
         interp = pdfinterp.PDFPageInterpreter(manager, conv)
 
         try:
-            pages = pdfpage.PDFPage.get_pages(io.StringIO(self.text))
+            pages = pdfpage.PDFPage.get_pages(io.BytesIO(self.text))
             for page in pages:
                 interp.process_page(page)
         except Exception:  # pylint: disable=broad-except
@@ -372,12 +419,12 @@ class _PDFParser(_BaseTextParser):
         return re.sub(r"\n\n+", "\n", value).strip()
 
 
-class _PlainTextParser(_BaseTextParser):
+class PlainTextParser(SourceParser):
     """A parser that can unicode-ify and strip text from a plain text page."""
 
     TYPE = "Text"
 
-    def parse(self):
+    def parse(self) -> str:
         """Unicode-ify and strip whitespace from the plain text document."""
         from bs4.dammit import UnicodeDammit
 
@@ -385,15 +432,25 @@ class _PlainTextParser(_BaseTextParser):
         return converted.strip() if converted else ""
 
 
-_CONTENT_TYPES = {
-    "text/html": _HTMLParser,
-    "application/xhtml+xml": _HTMLParser,
-    "application/pdf": _PDFParser,
-    "application/x-pdf": _PDFParser,
-    "text/plain": _PlainTextParser,
+_CONTENT_TYPES: dict[str, type[SourceParser]] = {
+    "text/html": HTMLParser,
+    "application/xhtml+xml": HTMLParser,
+    "application/pdf": PDFParser,
+    "application/x-pdf": PDFParser,
+    "text/plain": PlainTextParser,
 }
 
 
-def get_parser(content_type):
+@typing.overload
+def get_parser(content_type: str) -> type[SourceParser] | None: ...
+
+
+@typing.overload
+def get_parser(
+    content_type: Literal["text/plain"] = "text/plain",
+) -> type[SourceParser]: ...
+
+
+def get_parser(content_type: str = "text/plain") -> type[SourceParser] | None:
     """Return the parser most able to handle a given content type, or None."""
     return _CONTENT_TYPES.get(content_type)
diff --git a/src/earwigbot/wiki/copyvios/result.py b/src/earwigbot/wiki/copyvios/result.py
index 75436c7..075e2f0 100644
--- a/src/earwigbot/wiki/copyvios/result.py
+++ b/src/earwigbot/wiki/copyvios/result.py
@@ -18,13 +18,26 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import annotations
+
+__all__ = ["CopyvioSource", "CopyvioCheckResult"]
+
+import time
+import typing
 import urllib.parse
 from threading import Event
-from time import time
+from typing import Any
 
-from earwigbot.wiki.copyvios.markov import EMPTY, EMPTY_INTERSECTION
+from earwigbot.wiki.copyvios.markov import (
+    EMPTY,
+    EMPTY_INTERSECTION,
+    MarkovChain,
+    MarkovChainIntersection,
+)
 
-__all__ = ["CopyvioSource", "CopyvioCheckResult"]
+if typing.TYPE_CHECKING:
+    from earwigbot.wiki.copyvios.parsers import ParserArgs
+    from earwigbot.wiki.copyvios.workers import CopyvioWorkspace
 
 
 class CopyvioSource:
@@ -45,13 +58,13 @@ class CopyvioSource:
 
     def __init__(
         self,
-        workspace,
-        url,
-        headers=None,
-        timeout=5,
-        parser_args=None,
-        search_config=None,
-    ):
+        workspace: CopyvioWorkspace,
+        url: str,
+        headers: list[tuple[str, str]] | None = None,
+        timeout: float = 5,
+        parser_args: ParserArgs | None = None,
+        search_config: dict[str, Any] | None = None,
+    ) -> None:
         self.workspace = workspace
         self.url = url
         self.headers = headers
@@ -68,54 +81,57 @@ class CopyvioSource:
         self._event2 = Event()
         self._event2.set()
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Return the canonical string representation of the source."""
-        res = (
-            "CopyvioSource(url={0!r}, confidence={1!r}, skipped={2!r}, "
-            "excluded={3!r})"
+        return (
+            f"CopyvioSource(url={self.url!r}, confidence={self.confidence!r}, "
+            f"skipped={self.skipped!r}, excluded={self.excluded!r})"
         )
-        return res.format(self.url, self.confidence, self.skipped, self.excluded)
 
-    def __str__(self):
+    def __str__(self) -> str:
         """Return a nice string representation of the source."""
         if self.excluded:
             return f"<CopyvioSource ({self.url}, excluded)>"
         if self.skipped:
             return f"<CopyvioSource ({self.url}, skipped)>"
-        res = "<CopyvioSource ({0} with {1} conf)>"
-        return res.format(self.url, self.confidence)
+        return f"<CopyvioSource ({self.url} with {self.confidence} conf)>"
 
     @property
-    def domain(self):
+    def domain(self) -> str | None:
         """The source URL's domain name, or None."""
         return urllib.parse.urlparse(self.url).netloc or None
 
-    def start_work(self):
+    def start_work(self) -> None:
         """Mark this source as being worked on right now."""
         self._event2.clear()
         self._event1.set()
 
-    def update(self, confidence, source_chain, delta_chain):
+    def update(
+        self,
+        confidence: float,
+        source_chain: MarkovChain,
+        delta_chain: MarkovChainIntersection,
+    ) -> None:
         """Fill out the confidence and chain information inside this source."""
         self.confidence = confidence
         self.chains = (source_chain, delta_chain)
 
-    def finish_work(self):
+    def finish_work(self) -> None:
         """Mark this source as finished."""
         self._event2.set()
 
-    def skip(self):
+    def skip(self) -> None:
         """Deactivate this source without filling in the relevant data."""
         if self._event1.is_set():
             return
         self.skipped = True
         self._event1.set()
 
-    def join(self, until):
+    def join(self, until: float | None = None) -> None:
         """Block until this violation result is filled out."""
         for event in [self._event1, self._event2]:
-            if until:
-                timeout = until - time()
+            if until is not None:
+                timeout = until - time.time()
                 if timeout <= 0:
                     return
                 event.wait(timeout)
@@ -144,16 +160,15 @@ class CopyvioCheckResult:
 
     def __init__(
         self,
-        violation,
-        sources,
-        queries,
-        check_time,
-        article_chain,
-        possible_miss,
-        included_sources=None,
-        unified_confidence=None,
+        violation: bool,
+        sources: list[CopyvioSource],
+        queries: int,
+        check_time: float,
+        article_chain: MarkovChain,
+        possible_miss: bool,
+        included_sources: list[CopyvioSource] | None = None,
+        unified_confidence: float | None = None,
     ):
-        assert isinstance(sources, list)
         self.violation = violation
         self.sources = sources
         self.queries = queries
@@ -163,48 +178,47 @@ class CopyvioCheckResult:
         self.included_sources = included_sources if included_sources else []
         self.unified_confidence = unified_confidence
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Return the canonical string representation of the result."""
-        res = "CopyvioCheckResult(violation={0!r}, sources={1!r}, queries={2!r}, time={3!r})"
-        return res.format(self.violation, self.sources, self.queries, self.time)
+        return (
+            f"CopyvioCheckResult(violation={self.violation!r}, "
+            f"sources={self.sources!r}, queries={self.queries!r}, time={self.time!r})"
+        )
 
-    def __str__(self):
+    def __str__(self) -> str:
         """Return a nice string representation of the result."""
-        res = "<CopyvioCheckResult ({0} with best {1})>"
-        return res.format(self.violation, self.best)
+        return f"<CopyvioCheckResult ({self.violation} with best {self.best})>"
 
     @property
-    def best(self):
+    def best(self) -> CopyvioSource | None:
         """The best known source, or None if no sources exist."""
         return self.sources[0] if self.sources else None
 
     @property
-    def confidence(self):
+    def confidence(self) -> float:
         """The confidence of the best source, or 0 if no sources exist."""
         if self.unified_confidence is not None:
             return self.unified_confidence
-        if self.best:
+        if self.best is not None:
             return self.best.confidence
         return 0.0
 
     @property
-    def url(self):
+    def url(self) -> str | None:
         """The URL of the best source, or None if no sources exist."""
         return self.best.url if self.best else None
 
-    def get_log_message(self, title):
+    def get_log_message(self, title: str) -> str:
         """Build a relevant log message for this copyvio check result."""
         if not self.sources:
-            log = "No violation for [[{0}]] (no sources; {1} queries; {2} seconds)"
-            return log.format(title, self.queries, self.time)
-        log = "{0} for [[{1}]] (best: {2} ({3} confidence); {4} sources; {5} queries; {6} seconds)"
+            return (
+                f"No violation for [[{title}]] (no sources; {self.queries} queries; "
+                f"{self.time} seconds)"
+            )
+
         is_vio = "Violation detected" if self.violation else "No violation"
-        return log.format(
-            is_vio,
-            title,
-            self.url,
-            self.confidence,
-            len(self.sources),
-            self.queries,
-            self.time,
+        return (
+            f"{is_vio} for [[{title}]] (best: {self.url} ({self.confidence} "
+            f"confidence); {len(self.sources)} sources; {self.queries} queries; "
+            f"{self.time} seconds)"
         )
diff --git a/src/earwigbot/wiki/copyvios/search.py b/src/earwigbot/wiki/copyvios/search.py
index bc5f9fa..6d01f71 100644
--- a/src/earwigbot/wiki/copyvios/search.py
+++ b/src/earwigbot/wiki/copyvios/search.py
@@ -18,91 +18,101 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-import re
-from gzip import GzipFile
-from io import StringIO
-from json import loads
-from urllib.error import URLError
-from urllib.parse import urlencode
-
-from earwigbot.exceptions import SearchQueryError
-
 __all__ = [
     "BingSearchEngine",
     "GoogleSearchEngine",
+    "SearchEngine",
     "YandexSearchEngine",
-    "SEARCH_ENGINES",
+    "get_search_engine",
 ]
 
+import base64
+import gzip
+import io
+import json
+import re
+import urllib.parse
+import urllib.request
+from abc import ABC, abstractmethod
+from typing import Any
+from urllib.error import URLError
 
-class _BaseSearchEngine:
+from earwigbot import exceptions
+
+
+class SearchEngine(ABC):
     """Base class for a simple search engine interface."""
 
     name = "Base"
 
-    def __init__(self, cred, opener):
+    def __init__(
+        self, cred: dict[str, str], opener: urllib.request.OpenerDirector
+    ) -> None:
         """Store credentials (*cred*) and *opener* for searching later on."""
         self.cred = cred
         self.opener = opener
         self.count = 5
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """Return the canonical string representation of the search engine."""
         return f"{self.__class__.__name__}()"
 
-    def __str__(self):
+    def __str__(self) -> str:
         """Return a nice string representation of the search engine."""
         return f"<{self.__class__.__name__}>"
 
-    def _open(self, *args):
+    def _open(self, url: str) -> bytes:
         """Open a URL (like urlopen) and try to return its contents."""
         try:
-            response = self.opener.open(*args)
+            response = self.opener.open(url)
             result = response.read()
         except (OSError, URLError) as exc:
-            err = SearchQueryError(f"{self.name} Error: {exc}")
-            err.cause = exc
-            raise err
+            raise exceptions.SearchQueryError(f"{self.name} Error: {exc}")
 
         if response.headers.get("Content-Encoding") == "gzip":
-            stream = StringIO(result)
-            gzipper = GzipFile(fileobj=stream)
+            stream = io.BytesIO(result)
+            gzipper = gzip.GzipFile(fileobj=stream)
             result = gzipper.read()
 
         code = response.getcode()
         if code != 200:
-            err = "{0} Error: got response code '{1}':\n{2}'"
-            raise SearchQueryError(err.format(self.name, code, result))
+            raise exceptions.SearchQueryError(
+                f"{self.name} Error: got response code '{code}':\n{result}'"
+            )
 
         return result
 
     @staticmethod
-    def requirements():
+    def requirements() -> list[str]:
         """Return a list of packages required by this search engine."""
         return []
 
-    def search(self, query):
-        """Use this engine to search for *query*.
+    @abstractmethod
+    def search(self, query: str) -> list[str]:
+        """
+        Use this engine to search for *query*.
 
         Not implemented in this base class; overridden in subclasses.
         """
-        raise NotImplementedError()
 
 
-class BingSearchEngine(_BaseSearchEngine):
+class BingSearchEngine(SearchEngine):
     """A search engine interface with Bing Search (via Azure Marketplace)."""
 
     name = "Bing"
 
-    def __init__(self, cred, opener):
+    def __init__(
+        self, cred: dict[str, str], opener: urllib.request.OpenerDirector
+    ) -> None:
         super().__init__(cred, opener)
 
         key = self.cred["key"]
-        auth = (key + ":" + key).encode("base64").replace("\n", "")
-        self.opener.addheaders.append(("Authorization", "Basic " + auth))
+        auth = base64.b64encode(f"{key}:{key}".encode()).decode()
+        self.opener.addheaders.append(("Authorization", f"Basic {auth}"))
 
     def search(self, query: str) -> list[str]:
-        """Do a Bing web search for *query*.
+        """
+        Do a Bing web search for *query*.
 
         Returns a list of URLs ranked by relevance (as determined by Bing).
         Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
@@ -112,20 +122,19 @@ class BingSearchEngine(_BaseSearchEngine):
         params = {
             "$format": "json",
             "$top": str(self.count),
-            "Query": "'\"" + query.replace('"', "").encode("utf8") + "\"'",
+            "Query": "'\"" + query.replace('"', "") + "\"'",
             "Market": "'en-US'",
             "Adult": "'Off'",
             "Options": "'DisableLocationDetection'",
             "WebSearchOptions": "'DisableHostCollapsing+DisableQueryAlterations'",
         }
 
-        result = self._open(url + urlencode(params))
+        result = self._open(url + urllib.parse.urlencode(params))
 
         try:
-            res = loads(result)
+            res = json.loads(result)
         except ValueError:
-            err = "Bing Error: JSON could not be decoded"
-            raise SearchQueryError(err)
+            raise exceptions.SearchQueryError("Bing Error: JSON could not be decoded")
 
         try:
             results = res["d"]["results"]
@@ -134,13 +143,14 @@ class BingSearchEngine(_BaseSearchEngine):
         return [result["Url"] for result in results]
 
 
-class GoogleSearchEngine(_BaseSearchEngine):
+class GoogleSearchEngine(SearchEngine):
     """A search engine interface with Google Search."""
 
     name = "Google"
 
     def search(self, query: str) -> list[str]:
-        """Do a Google web search for *query*.
+        """
+        Do a Google web search for *query*.
 
         Returns a list of URLs ranked by relevance (as determined by Google).
         Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
@@ -157,13 +167,13 @@ class GoogleSearchEngine(_BaseSearchEngine):
             "fields": "items(link)",
         }
 
-        result = self._open(url + urlencode(params))
+        result = self._open(url + urllib.parse.urlencode(params))
 
         try:
-            res = loads(result)
+            res = json.loads(result)
         except ValueError:
             err = "Google Error: JSON could not be decoded"
-            raise SearchQueryError(err)
+            raise exceptions.SearchQueryError(err)
 
         try:
             return [item["link"] for item in res["items"]]
@@ -171,7 +181,7 @@ class GoogleSearchEngine(_BaseSearchEngine):
             return []
 
 
-class YandexSearchEngine(_BaseSearchEngine):
+class YandexSearchEngine(SearchEngine):
     """A search engine interface with Yandex Search."""
 
     name = "Yandex"
@@ -181,7 +191,8 @@ class YandexSearchEngine(_BaseSearchEngine):
         return ["lxml.etree"]
 
     def search(self, query: str) -> list[str]:
-        """Do a Yandex web search for *query*.
+        """
+        Do a Yandex web search for *query*.
 
         Returns a list of URLs ranked by relevance (as determined by Yandex).
         Raises :py:exc:`~earwigbot.exceptions.SearchQueryError` on errors.
@@ -201,17 +212,51 @@ class YandexSearchEngine(_BaseSearchEngine):
             "groupby": f"mode=flat.groups-on-page={self.count}",
         }
 
-        result = self._open(url + urlencode(params))
+        result = self._open(url + urllib.parse.urlencode(params))
 
         try:
-            data = lxml.etree.fromstring(result)  # type: ignore
+            data = lxml.etree.fromstring(result)
             return [elem.text for elem in data.xpath(".//url")]
         except lxml.etree.Error as exc:
-            raise SearchQueryError("Yandex XML parse error: " + str(exc))
+            raise exceptions.SearchQueryError(f"Yandex XML parse error: {exc}")
 
 
-SEARCH_ENGINES = {
+SEARCH_ENGINES: dict[str, type[SearchEngine]] = {
     "Bing": BingSearchEngine,
     "Google": GoogleSearchEngine,
     "Yandex": YandexSearchEngine,
 }
+
+
+def get_search_engine(
+    search_config: dict[str, Any], headers: list[tuple[str, str]]
+) -> SearchEngine:
+    """Return a function that can be called to do web searches.
+
+    The function takes one argument, a search query, and returns a list of URLs, ranked
+    by importance. The underlying logic depends on the *engine* argument within our
+    config; for example, if *engine* is "Yahoo! BOSS", we'll use YahooBOSSSearchEngine
+    for querying.
+
+    Raises UnknownSearchEngineError if the 'engine' listed in our config is unknown to
+    us, and UnsupportedSearchEngineError if we are missing a required package or
+    module, like oauth2 for "Yahoo! BOSS".
+    """
+    engine = search_config["engine"]
+    if engine not in SEARCH_ENGINES:
+        raise exceptions.UnknownSearchEngineError(engine)
+
+    klass = SEARCH_ENGINES[engine]
+    credentials = search_config["credentials"]
+    opener = urllib.request.build_opener()
+    opener.addheaders = headers
+
+    for dep in klass.requirements():
+        try:
+            __import__(dep).__name__
+        except (ModuleNotFoundError, AttributeError):
+            e = "Missing a required dependency ({}) for the {} engine"
+            e = e.format(dep, engine)
+            raise exceptions.UnsupportedSearchEngineError(e)
+
+    return klass(credentials, opener)
diff --git a/src/earwigbot/wiki/copyvios/workers.py b/src/earwigbot/wiki/copyvios/workers.py
index 2850277..0a7a5ff 100644
--- a/src/earwigbot/wiki/copyvios/workers.py
+++ b/src/earwigbot/wiki/copyvios/workers.py
@@ -18,59 +18,61 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+from __future__ import annotations
+
+__all__ = ["globalize", "localize", "CopyvioWorkspace"]
+
 import base64
 import collections
+import dataclasses
 import functools
+import gzip
+import io
+import logging
+import math
+import queue
+import struct
+import threading
 import time
 import urllib.parse
-from collections import deque
-from gzip import GzipFile
+import urllib.request
+from collections.abc import Callable, Container
+from dataclasses import dataclass
 from http.client import HTTPException
-from io import StringIO
-from logging import getLogger
-from math import log
-from queue import Empty, Queue
-from struct import error as struct_error
-from threading import Lock, Thread
+from typing import Any
 from urllib.error import URLError
-from urllib.request import Request, build_opener
 
-from earwigbot import importer
 from earwigbot.exceptions import ParserExclusionError, ParserRedirectError
 from earwigbot.wiki.copyvios.markov import (
+    DEFAULT_DEGREE,
     MarkovChain,
     MarkovChainIntersection,
     MarkovChainUnion,
 )
-from earwigbot.wiki.copyvios.parsers import get_parser
+from earwigbot.wiki.copyvios.parsers import ParserArgs, SourceParser, get_parser
 from earwigbot.wiki.copyvios.result import CopyvioCheckResult, CopyvioSource
 
-tldextract = importer.new("tldextract")
-
-__all__ = ["globalize", "localize", "CopyvioWorkspace"]
-
 INCLUDE_THRESHOLD = 0.15
 
 _MAX_REDIRECTS = 3
 _MAX_RAW_SIZE = 20 * 1024**2
 
 _is_globalized = False
-_global_queues = None
-_global_workers = []
+_global_queues: _CopyvioQueues | None = None
+_global_workers: list[_CopyvioWorker] = []
 
-_OpenedURL = collections.namedtuple("_OpenedURL", ["content", "parser_class"])
 
+def globalize(num_workers: int = 8) -> None:
+    """
+    Cause all copyvio checks to be done by one global set of workers.
 
-def globalize(num_workers=8):
-    """Cause all copyvio checks to be done by one global set of workers.
-
-    This is useful when checks are being done through a web interface where
-    large numbers of simulatenous requests could be problematic. The global
-    workers are spawned when the function is called, run continuously, and
-    intelligently handle multiple checks.
+    This is useful when checks are being done through a web interface where large
+    numbers of simulatenous requests could be problematic. The global workers are
+    spawned when the function is called, run continuously, and intelligently handle
+    multiple checks.
 
-    This function is not thread-safe and should only be called when no checks
-    are being done. It has no effect if it has already been called.
+    This function is not thread-safe and should only be called when no checks are being
+    done. It has no effect if it has already been called.
     """
     global _is_globalized, _global_queues
     if _is_globalized:
@@ -84,19 +86,20 @@ def globalize(num_workers=8):
     _is_globalized = True
 
 
-def localize():
+def localize() -> None:
     """Return to using page-specific workers for copyvio checks.
 
-    This disables changes made by :func:`globalize`, including stoping the
-    global worker threads.
+    This disables changes made by :func:`globalize`, including stoping the global
+    worker threads.
 
-    This function is not thread-safe and should only be called when no checks
-    are being done.
+    This function is not thread-safe and should only be called when no checks are
+    being done.
     """
     global _is_globalized, _global_queues, _global_workers
     if not _is_globalized:
         return
 
+    assert _global_queues is not None
     for i in range(len(_global_workers)):
         _global_queues.unassigned.put((StopIteration, None))
     _global_queues = None
@@ -104,30 +107,50 @@ def localize():
     _is_globalized = False
 
 
+@dataclass(frozen=True)
+class OpenedURL:
+    content: bytes
+    parser_class: type[SourceParser]
+
+
+SourceQueue = collections.deque[CopyvioSource]
+UnassignedQueue = queue.Queue[
+    tuple[str, SourceQueue] | tuple[type[StopIteration], None]
+]
+
+
+@dataclass(frozen=True)
 class _CopyvioQueues:
     """Stores data necessary to maintain the various queues during a check."""
 
-    def __init__(self):
-        self.lock = Lock()
-        self.sites = {}
-        self.unassigned = Queue()
+    lock: threading.Lock = dataclasses.field(default_factory=threading.Lock)
+    sites: dict[str, SourceQueue] = dataclasses.field(default_factory=dict)
+    unassigned: UnassignedQueue = dataclasses.field(default_factory=queue.Queue)
 
 
 class _CopyvioWorker:
     """A multithreaded URL opener/parser instance."""
 
-    def __init__(self, name, queues, until=None):
+    def __init__(
+        self, name: str, queues: _CopyvioQueues, until: float | None = None
+    ) -> None:
         self._name = name
         self._queues = queues
         self._until = until
 
-        self._site = None
-        self._queue = None
-        self._search_config = None
-        self._opener = build_opener()
-        self._logger = getLogger("earwigbot.wiki.cvworker." + name)
+        self._site: str | None = None
+        self._queue: SourceQueue | None = None
+        self._search_config: dict[str, Any] | None = None
+        self._opener = urllib.request.build_opener()
+        self._logger = logging.getLogger("earwigbot.wiki.cvworker." + name)
 
-    def _try_map_proxy_url(self, url, parsed, extra_headers, is_error=False):
+    def _try_map_proxy_url(
+        self,
+        url: str,
+        parsed: urllib.parse.ParseResult,
+        extra_headers: dict[str, str],
+        is_error: bool = False,
+    ) -> tuple[str, bool]:
         if not self._search_config or "proxies" not in self._search_config:
             return url, False
         for proxy_info in self._search_config["proxies"]:
@@ -152,17 +175,20 @@ class _CopyvioWorker:
             return url, True
         return url, False
 
-    def _open_url_raw(self, url, timeout=5, allow_content_types=None):
+    def _open_url_raw(
+        self,
+        url: str,
+        timeout: float = 5,
+        allow_content_types: Container[str] | None = None,
+    ) -> OpenedURL | None:
         """Open a URL, without parsing it.
 
         None will be returned for URLs that cannot be read for whatever reason.
         """
         parsed = urllib.parse.urlparse(url)
-        if not isinstance(url, str):
-            url = url.encode("utf8")
-        extra_headers = {}
+        extra_headers: dict[str, str] = {}
         url, _ = self._try_map_proxy_url(url, parsed, extra_headers)
-        request = Request(url, headers=extra_headers)
+        request = urllib.request.Request(url, headers=extra_headers)
         try:
             response = self._opener.open(request, timeout=timeout)
         except (OSError, URLError, HTTPException, ValueError):
@@ -170,14 +196,14 @@ class _CopyvioWorker:
                 url, parsed, extra_headers, is_error=True
             )
             if not remapped:
-                self._logger.exception("Failed to fetch URL: %s", url)
+                self._logger.exception(f"Failed to fetch URL: {url}")
                 return None
-            self._logger.info("Failed to fetch URL, trying proxy remap: %s", url)
-            request = Request(url, headers=extra_headers)
+            self._logger.info(f"Failed to fetch URL, trying proxy remap: {url}")
+            request = urllib.request.Request(url, headers=extra_headers)
             try:
                 response = self._opener.open(request, timeout=timeout)
             except (OSError, URLError, HTTPException, ValueError):
-                self._logger.exception("Failed to fetch URL after proxy remap: %s", url)
+                self._logger.exception(f"Failed to fetch URL after proxy remap: {url}")
                 return None
 
         try:
@@ -193,7 +219,7 @@ class _CopyvioWorker:
         ):
             return None
         if not parser_class:
-            parser_class = get_parser("text/plain")
+            parser_class = get_parser()
         if size > (15 if parser_class.TYPE == "PDF" else 2) * 1024**2:
             return None
 
@@ -207,28 +233,27 @@ class _CopyvioWorker:
             return None
 
         if response.headers.get("Content-Encoding") == "gzip":
-            stream = StringIO(content)
-            gzipper = GzipFile(fileobj=stream)
+            stream = io.BytesIO(content)
+            gzipper = gzip.GzipFile(fileobj=stream)
             try:
                 content = gzipper.read()
-            except (OSError, struct_error):
+            except (OSError, struct.error):
                 return None
 
         if len(content) > _MAX_RAW_SIZE:
             return None
-        return _OpenedURL(content, parser_class)
+        return OpenedURL(content, parser_class)
 
-    def _open_url(self, source, redirects=0):
+    def _open_url(self, source: CopyvioSource, redirects: int = 0) -> str | None:
         """Open a URL and return its parsed content, or None.
 
-        First, we will decompress the content if the headers contain "gzip" as
-        its content encoding. Then, we will return the content stripped using
-        an HTML parser if the headers indicate it is HTML, or return the
-        content directly if it is plain text. If we don't understand the
-        content type, we'll return None.
+        First, we will decompress the content if the headers contain "gzip" as its
+        content encoding. Then, we will return the content stripped using an HTML
+        parser if the headers indicate it is HTML, or return the content directly if it
+        is plain text. If we don't understand the content type, we'll return None.
 
-        If a URLError was raised while opening the URL or an IOError was raised
-        while decompressing, None will be returned.
+        If a URLError was raised while opening the URL or an IOError was raised while
+        decompressing, None will be returned.
         """
         self._search_config = source.search_config
         if source.headers:
@@ -238,9 +263,9 @@ class _CopyvioWorker:
         if result is None:
             return None
 
-        args = source.parser_args.copy() if source.parser_args else {}
+        args: ParserArgs = source.parser_args.copy() if source.parser_args else {}
         args["open_url"] = functools.partial(self._open_url_raw, timeout=source.timeout)
-        parser = result.parser_class(result.content, url=source.url, args=args)
+        parser = result.parser_class(result.content, source.url, args=args)
         try:
             return parser.parse()
         except ParserRedirectError as exc:
@@ -249,30 +274,31 @@ class _CopyvioWorker:
             source.url = exc.url.decode("utf8")
             return self._open_url(source, redirects=redirects + 1)
 
-    def _acquire_new_site(self):
+    def _acquire_new_site(self) -> None:
         """Block for a new unassigned site queue."""
         if self._until:
             timeout = self._until - time.time()
             if timeout <= 0:
-                raise Empty
+                raise queue.Empty()
         else:
             timeout = None
 
         self._logger.debug("Waiting for new site queue")
-        site, queue = self._queues.unassigned.get(timeout=timeout)
-        if site is StopIteration:
+        site, q = self._queues.unassigned.get(timeout=timeout)
+        if isinstance(site, type) and issubclass(site, StopIteration):
             raise StopIteration
         self._logger.debug(f"Acquired new site queue: {site}")
         self._site = site
-        self._queue = queue
+        self._queue = q
 
-    def _dequeue(self):
+    def _dequeue(self) -> CopyvioSource:
         """Remove a source from one of the queues."""
         if not self._site:
             self._acquire_new_site()
+        assert self._site is not None
+        assert self._queue is not None
 
-        logmsg = "Fetching source URL from queue {0}"
-        self._logger.debug(logmsg.format(self._site))
+        self._logger.debug(f"Fetching source URL from queue {self._site}")
         self._queues.lock.acquire()
         try:
             source = self._queue.popleft()
@@ -294,11 +320,11 @@ class _CopyvioWorker:
         self._queues.lock.release()
         return source
 
-    def _handle_once(self):
-        """Handle a single source from one of the queues."""
+    def _handle_once(self) -> bool:
+        """Handle a single source from one of the queues. Return if we should exit."""
         try:
             source = self._dequeue()
-        except Empty:
+        except queue.Empty:
             self._logger.debug("Exiting: queue timed out")
             return False
         except StopIteration:
@@ -320,12 +346,11 @@ class _CopyvioWorker:
             source.workspace.compare(source, chain)
         return True
 
-    def _run(self):
+    def _run(self) -> None:
         """Main entry point for the worker thread.
 
-        We will keep fetching URLs from the queues and handling them until
-        either we run out of time, or we get an exit signal that the queue is
-        now empty.
+        We will keep fetching URLs from the queues and handling them until either we
+        run out of time, or we get an exit signal that the queue is now empty.
         """
         while True:
             try:
@@ -335,9 +360,9 @@ class _CopyvioWorker:
                 self._logger.exception("Uncaught exception in worker")
                 time.sleep(5)  # Delay if we get stuck in a busy loop
 
-    def start(self):
+    def start(self) -> None:
         """Start the copyvio worker in a new thread."""
-        thread = Thread(target=self._run, name="cvworker-" + self._name)
+        thread = threading.Thread(target=self._run, name="cvworker-" + self._name)
         thread.daemon = True
         thread.start()
 
@@ -347,20 +372,20 @@ class CopyvioWorkspace:
 
     def __init__(
         self,
-        article,
-        min_confidence,
-        max_time,
-        logger,
-        headers,
-        url_timeout=5,
-        num_workers=8,
-        short_circuit=True,
-        parser_args=None,
-        exclude_check=None,
-        config=None,
-        degree=5,
-    ):
-        self.sources = []
+        article: MarkovChain,
+        min_confidence: float,
+        max_time: float,
+        logger: logging.Logger,
+        headers: list[tuple[str, str]],
+        url_timeout: float = 5,
+        num_workers: int = 8,
+        short_circuit: bool = True,
+        parser_args: ParserArgs | None = None,
+        exclusion_callback: Callable[[str], bool] | None = None,
+        config: dict[str, Any] | None = None,
+        degree: int = DEFAULT_DEGREE,
+    ) -> None:
+        self.sources: list[CopyvioSource] = []
         self.finished = False
         self.possible_miss = False
 
@@ -369,8 +394,8 @@ class CopyvioWorkspace:
         self._min_confidence = min_confidence
         self._start_time = time.time()
         self._until = (self._start_time + max_time) if max_time > 0 else None
-        self._handled_urls = set()
-        self._finish_lock = Lock()
+        self._handled_urls: set[str] = set()
+        self._finish_lock = threading.Lock()
         self._short_circuit = short_circuit
         self._source_args = {
             "workspace": self,
@@ -379,10 +404,11 @@ class CopyvioWorkspace:
             "parser_args": parser_args,
             "search_config": config,
         }
-        self._exclude_check = exclude_check
+        self._exclusion_callback = exclusion_callback
         self._degree = degree
 
         if _is_globalized:
+            assert _global_queues is not None
             self._queues = _global_queues
         else:
             self._queues = _CopyvioQueues()
@@ -391,28 +417,27 @@ class CopyvioWorkspace:
                 name = f"local-{id(self) % 10000:04}.{i}"
                 _CopyvioWorker(name, self._queues, self._until).start()
 
-    def _calculate_confidence(self, delta):
+    def _calculate_confidence(self, delta: MarkovChainIntersection) -> float:
         """Return the confidence of a violation as a float between 0 and 1."""
 
-        def conf_with_article_and_delta(article, delta):
+        def conf_with_article_and_delta(article: float, delta: float) -> float:
             """Calculate confidence using the article and delta chain sizes."""
-            # This piecewise function exhibits exponential growth until it
-            # reaches the default "suspect" confidence threshold, at which
-            # point it transitions to polynomial growth with a limit of 1 as
-            # (delta / article) approaches 1.
+            # This piecewise function exhibits exponential growth until it reaches the
+            # default "suspect" confidence threshold, at which point it transitions to
+            # polynomial growth with a limit of 1 as # (delta / article) approaches 1.
             # A graph can be viewed here: https://goo.gl/mKPhvr
             ratio = delta / article
             if ratio <= 0.52763:
-                return -log(1 - ratio)
+                return -math.log(1 - ratio)
             else:
                 return (-0.8939 * (ratio**2)) + (1.8948 * ratio) - 0.0009
 
-        def conf_with_delta(delta):
+        def conf_with_delta(delta: float) -> float:
             """Calculate confidence using just the delta chain size."""
             # This piecewise function was derived from experimental data using
-            # reference points at (0, 0), (100, 0.5), (250, 0.75), (500, 0.9),
-            # and (1000, 0.95), with a limit of 1 as delta approaches infinity.
-            # A graph can be viewed here: https://goo.gl/lVl7or
+            # reference points at (0, 0), (100, 0.5), (250, 0.75), (500, 0.9), and
+            # (1000, 0.95), with a limit of 1 as delta approaches infinity. A graph can
+            # be viewed here: https://goo.gl/lVl7or
             if delta <= 100:
                 return delta / (delta + 100)
             elif delta <= 250:
@@ -430,7 +455,7 @@ class CopyvioWorkspace:
             )
         )
 
-    def _finish_early(self):
+    def _finish_early(self) -> None:
         """Finish handling links prematurely (if we've hit min_confidence)."""
         self._logger.debug("Confidence threshold met; skipping remaining sources")
         with self._queues.lock:
@@ -438,7 +463,7 @@ class CopyvioWorkspace:
                 source.skip()
             self.finished = True
 
-    def enqueue(self, urls):
+    def enqueue(self, urls: list[str]) -> None:
         """Put a list of URLs into the various worker queues."""
         for url in urls:
             with self._queues.lock:
@@ -449,7 +474,7 @@ class CopyvioWorkspace:
                 source = CopyvioSource(url=url, **self._source_args)
                 self.sources.append(source)
 
-                if self._exclude_check and self._exclude_check(url):
+                if self._exclusion_callback and self._exclusion_callback(url):
                     self._logger.debug(f"enqueue(): exclude {url}")
                     source.excluded = True
                     source.skip()
@@ -460,32 +485,37 @@ class CopyvioWorkspace:
                     continue
 
                 try:
+                    import tldextract
+
                     key = tldextract.extract(url).registered_domain
-                except ImportError:  # Fall back on very naive method
+                except ModuleNotFoundError:  # Fall back on very naive method
                     from urllib.parse import urlparse
 
                     key = ".".join(urlparse(url).netloc.split(".")[-2:])
 
-                logmsg = "enqueue(): {0} {1} -> {2}"
+                logmsg = f"enqueue(): %s {key} -> {url}"
                 if key in self._queues.sites:
-                    self._logger.debug(logmsg.format("append", key, url))
+                    self._logger.debug(logmsg % "append")
                     self._queues.sites[key].append(source)
                 else:
-                    self._logger.debug(logmsg.format("new", key, url))
-                    self._queues.sites[key] = queue = deque()
-                    queue.append(source)
-                    self._queues.unassigned.put((key, queue))
+                    self._logger.debug(logmsg % "new")
+                    q: SourceQueue = collections.deque()
+                    q.append(source)
+                    self._queues.sites[key] = q
+                    self._queues.unassigned.put((key, q))
 
-    def compare(self, source, source_chain):
+    def compare(self, source: CopyvioSource, source_chain: MarkovChain | None) -> None:
         """Compare a source to the article; call _finish_early if necessary."""
         if source_chain:
             delta = MarkovChainIntersection(self._article, source_chain)
             conf = self._calculate_confidence(delta)
         else:
+            delta = None
             conf = 0.0
         self._logger.debug(f"compare(): {source.url} -> {conf}")
         with self._finish_lock:
             if source_chain:
+                assert delta is not None
                 source.update(conf, source_chain, delta)
             source.finish_work()
             if not self.finished and conf >= self._min_confidence:
@@ -494,7 +524,7 @@ class CopyvioWorkspace:
                 else:
                     self.finished = True
 
-    def wait(self):
+    def wait(self) -> None:
         """Wait for the workers to finish handling the sources."""
         self._logger.debug(f"Waiting on {len(self.sources)} sources")
         for source in self.sources:
@@ -505,7 +535,7 @@ class CopyvioWorkspace:
             for i in range(self._num_workers):
                 self._queues.unassigned.put((StopIteration, None))
 
-    def get_result(self, num_queries=0):
+    def get_result(self, num_queries: int = 0) -> CopyvioCheckResult:
         """Return a CopyvioCheckResult containing the results of this check."""
         self.sources.sort(
             key=lambda s: (
diff --git a/src/earwigbot/wiki/page.py b/src/earwigbot/wiki/page.py
index 23edce3..e395fdf 100644
--- a/src/earwigbot/wiki/page.py
+++ b/src/earwigbot/wiki/page.py
@@ -35,14 +35,14 @@ import mwparserfromhell
 
 from earwigbot import exceptions
 from earwigbot.exceptions import APIError
-from earwigbot.wiki.copyvios import CopyvioMixIn
+from earwigbot.wiki.copyvios import DEFAULT_DEGREE, CopyvioChecker, CopyvioCheckResult
 
 if typing.TYPE_CHECKING:
     from earwigbot.wiki.site import Site
     from earwigbot.wiki.user import User
 
 
-class Page(CopyvioMixIn):
+class Page:
     """
     **EarwigBot: Wiki Toolset: Page**
 
@@ -110,7 +110,6 @@ class Page(CopyvioMixIn):
         __init__() will not do any API queries, but it will use basic namespace logic
         to determine our namespace ID and if we are a talkpage.
         """
-        super().__init__(site)
         self._site = site
         self._title = title.strip()
         self._follow_redirects = self._keep_following = follow_redirects
@@ -873,3 +872,108 @@ class Page(CopyvioMixIn):
                 return False
 
         return True
+
+    def copyvio_check(
+        self,
+        min_confidence: float = 0.75,
+        max_queries: int = 15,
+        max_time: float = -1,
+        no_searches: bool = False,
+        no_links: bool = False,
+        short_circuit: bool = True,
+        degree: int = DEFAULT_DEGREE,
+    ) -> CopyvioCheckResult:
+        """
+        Check the page for copyright violations.
+
+        Returns a :class:`.CopyvioCheckResult` object with information on the results
+        of the check.
+
+        *min_confidence* is the minimum amount of confidence we must have in the
+        similarity between a source text and the article in order for us to consider it
+        a suspected violation. This is a number between 0 and 1.
+
+        *max_queries* is self-explanatory; we will never make more than this number of
+        queries in a given check.
+
+        *max_time* can be set to prevent copyvio checks from taking longer than a set
+        amount of time (generally around a minute), which can be useful if checks are
+        called through a web server with timeouts. We will stop checking new URLs as
+        soon as this limit is reached.
+
+        Setting *no_searches* to ``True`` will cause only URLs in the wikitext of the
+        page to be checked; no search engine queries will be made. Setting *no_links*
+        to ``True`` will cause the opposite to happen: URLs in the wikitext will be
+        ignored; search engine queries will be made only. Setting both of these to
+        ``True`` is pointless.
+
+        Normally, the checker will short-circuit if it finds a URL that meets
+        *min_confidence*. This behavior normally causes it to skip any remaining URLs
+        and web queries, but setting *short_circuit* to ``False`` will prevent this.
+
+        The *degree* controls the n-gram word size used in comparing similarity. It
+        should usually be a number between 3 and 5.
+
+        Raises :exc:`.CopyvioCheckError` or subclasses
+        (:exc:`.UnknownSearchEngineError`, :exc:`.SearchQueryError`, ...) on errors.
+        """
+        self._logger.info(f"Starting copyvio check for [[{self.title}]]")
+        checker = CopyvioChecker(
+            self,
+            min_confidence=min_confidence,
+            max_time=max_time,
+            degree=degree,
+            logger=self._logger,
+        )
+
+        result = checker.run_check(
+            max_queries=max_queries,
+            no_searches=no_searches,
+            no_links=no_links,
+            short_circuit=short_circuit,
+        )
+        self._logger.info(result.get_log_message(self.title))
+        return result
+
+    def copyvio_compare(
+        self,
+        urls: list[str] | str,
+        min_confidence: float = 0.75,
+        max_time: float = 30,
+        degree: int = DEFAULT_DEGREE,
+    ) -> CopyvioCheckResult:
+        """
+        Check the page, like :py:meth:`copyvio_check`, against specific URLs.
+
+        This is essentially a reduced version of :meth:`copyvio_check` - a copyivo
+        comparison is made using Markov chains and the result is returned in a
+        :class:`.CopyvioCheckResult` object - but without using a search engine, since
+        the suspected "violated" URL is supplied from the start.
+
+        One use case is to generate a result when the URL is retrieved from a cache,
+        like the one used in EarwigBot's Toolforge site. After a search is done, the
+        resulting URL is stored in a cache for 72 hours so future checks against that
+        page will not require another set of time-and-money-consuming search engine
+        queries. However, the comparison itself (which includes the article's and the
+        source's content) cannot be stored for data retention reasons, so a fresh
+        comparison is made using this function.
+
+        Since no searching is done, neither :exc:`.UnknownSearchEngineError` nor
+        :exc:`.SearchQueryError` will be raised.
+        """
+        if not isinstance(urls, list):
+            urls = [urls]
+        self._logger.info(
+            f"Starting copyvio compare for [[{self.title}]] against {', '.join(urls)}"
+        )
+        checker = CopyvioChecker(
+            self,
+            min_confidence=min_confidence,
+            max_time=max_time,
+            degree=degree,
+            logger=self._logger,
+        )
+
+        result = checker.run_compare(urls)
+        self._logger.info(result.get_log_message(self.title))
+        return result